User Inputs

output.var = params$output.var 

transform.abs = FALSE
log.pred = params$log.pred
norm.pred = params$norm.pred
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 9
##  $ output.var         : chr "y3"
##  $ log.pred           : logi TRUE
##  $ norm.pred          : logi FALSE
##  $ eda                : logi FALSE
##  $ algo.forward.caret : logi TRUE
##  $ algo.backward.caret: logi TRUE
##  $ algo.stepwise.caret: logi TRUE
##  $ algo.LASSO.caret   : logi TRUE
##  $ algo.LARS.caret    : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.log') else  output.var.tr = output.var
# output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.cuberoot') else  output.var.tr = output.var
# output.var.tr = if (norm.pred == TRUE)  paste0(output.var,'.bestnorm') else  output.var.tr = output.var

Loading Data

feat  = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')

Data validation

cc  = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
##       JobName           y3        
##  Job_00001:   1   Min.   : 95.91  
##  Job_00002:   1   1st Qu.:118.29  
##  Job_00003:   1   Median :124.03  
##  Job_00004:   1   Mean   :125.40  
##  Job_00007:   1   3rd Qu.:131.06  
##  Job_00008:   1   Max.   :193.73  
##  (Other)  :6974

Output Variable

The Output Variable y3 shows right skewness, so will proceed with a log transformation

Histogram

df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() 

  #stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  

QQPlot

ggplot(gather(select_at(data,output.var)), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Trasformation of Output Variable from y3 to y3.log

if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
# if(log.pred==TRUE) data[[output.var.tr]] = (data[[output.var]])^(1/3) else
  data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=2)

ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Best Normalizator y3

Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project

if (norm.pred == TRUE){
  t=bestNormalize::bestNormalize(data[[output.var]])
  t
  qqnorm(data[[output.var]])
  qqnorm(predict(t))
  data[[output.var.tr]] = predict(t)
}

orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution

Predictors

Feature Engineering

data$x2byx1 = data$x2/data$x1
data$x6byx5 = data$x6/data$x5
data$x9byx7 = data$x9/data$x7
data$x10byx8 = data$x10/data$x8
data$x14byx12 = data$x14/data$x12
data$x15byx13 = data$x15/data$x13
data$x17byx16 = data$x17/data$x16
data$x19byx18 = data$x19/data$x18
data$x21byx20 = data$x21/data$x20
data$x23byx22 = data$x23/data$x22
data$x1log = log(data$x1)
data$x2log = log(data$x2)
data$x5log = log(data$x5)
data$x6log = log(data$x6)
data$x7log = log(data$x7)
data$x9log = log(data$x9)
data$x8log = log(data$x8)
data$x10log = log(data$x10)
data$x12log = log(data$x12)
data$x14log = log(data$x14)
data$x13log = log(data$x13)
data$x15log = log(data$x15)
data$x16log = log(data$x16)
data$x17log = log(data$x17)
data$x18log = log(data$x18)
data$x19log = log(data$x19)
data$x20log = log(data$x20)
data$x21log = log(data$x21)
data$x22log = log(data$x22)
data$x23log = log(data$x23)
data$x11log = log(data$x11)
data$x1sqinv = 1/(data$x1)^2 
data$x5sqinv = 1/(data$x5)^2 
data$x7sqinv = 1/(data$x7)^2 
data$x8sqinv = 1/(data$x8)^2 
data$x12sqinv = 1/(data$x12)^2 
data$x13sqinv = 1/(data$x13)^2 
data$x16sqinv = 1/(data$x16)^2 
data$x18sqinv = 1/(data$x18)^2 
data$x20sqinv = 1/(data$x20)^2 
data$x22sqinv = 1/(data$x22)^2 
predictors
##   [1] "x1"      "x2"      "x3"      "x4"      "x5"      "x6"      "x7"      "x8"      "x9"      "x10"     "x11"    
##  [12] "x12"     "x13"     "x14"     "x15"     "x16"     "x17"     "x18"     "x19"     "x20"     "x21"     "x22"    
##  [23] "x23"     "stat1"   "stat2"   "stat3"   "stat4"   "stat5"   "stat6"   "stat7"   "stat8"   "stat9"   "stat10" 
##  [34] "stat11"  "stat12"  "stat13"  "stat14"  "stat15"  "stat16"  "stat17"  "stat18"  "stat19"  "stat20"  "stat21" 
##  [45] "stat22"  "stat23"  "stat24"  "stat25"  "stat26"  "stat27"  "stat28"  "stat29"  "stat30"  "stat31"  "stat32" 
##  [56] "stat33"  "stat34"  "stat35"  "stat36"  "stat37"  "stat38"  "stat39"  "stat40"  "stat41"  "stat42"  "stat43" 
##  [67] "stat44"  "stat45"  "stat46"  "stat47"  "stat48"  "stat49"  "stat50"  "stat51"  "stat52"  "stat53"  "stat54" 
##  [78] "stat55"  "stat56"  "stat57"  "stat58"  "stat59"  "stat60"  "stat61"  "stat62"  "stat63"  "stat64"  "stat65" 
##  [89] "stat66"  "stat67"  "stat68"  "stat69"  "stat70"  "stat71"  "stat72"  "stat73"  "stat74"  "stat75"  "stat76" 
## [100] "stat77"  "stat78"  "stat79"  "stat80"  "stat81"  "stat82"  "stat83"  "stat84"  "stat85"  "stat86"  "stat87" 
## [111] "stat88"  "stat89"  "stat90"  "stat91"  "stat92"  "stat93"  "stat94"  "stat95"  "stat96"  "stat97"  "stat98" 
## [122] "stat99"  "stat100" "stat101" "stat102" "stat103" "stat104" "stat105" "stat106" "stat107" "stat108" "stat109"
## [133] "stat110" "stat111" "stat112" "stat113" "stat114" "stat115" "stat116" "stat117" "stat118" "stat119" "stat120"
## [144] "stat121" "stat122" "stat123" "stat124" "stat125" "stat126" "stat127" "stat128" "stat129" "stat130" "stat131"
## [155] "stat132" "stat133" "stat134" "stat135" "stat136" "stat137" "stat138" "stat139" "stat140" "stat141" "stat142"
## [166] "stat143" "stat144" "stat145" "stat146" "stat147" "stat148" "stat149" "stat150" "stat151" "stat152" "stat153"
## [177] "stat154" "stat155" "stat156" "stat157" "stat158" "stat159" "stat160" "stat161" "stat162" "stat163" "stat164"
## [188] "stat165" "stat166" "stat167" "stat168" "stat169" "stat170" "stat171" "stat172" "stat173" "stat174" "stat175"
## [199] "stat176" "stat177" "stat178" "stat179" "stat180" "stat181" "stat182" "stat183" "stat184" "stat185" "stat186"
## [210] "stat187" "stat188" "stat189" "stat190" "stat191" "stat192" "stat193" "stat194" "stat195" "stat196" "stat197"
## [221] "stat198" "stat199" "stat200" "stat201" "stat202" "stat203" "stat204" "stat205" "stat206" "stat207" "stat208"
## [232] "stat209" "stat210" "stat211" "stat212" "stat213" "stat214" "stat215" "stat216" "stat217"
controlled.vars = colnames(data)[grep("^x",colnames(data))]
stat.vars = colnames(data)[grep("^stat",colnames(data))]

predictors = c(controlled.vars,stat.vars)
predictors
##   [1] "x1"       "x2"       "x3"       "x4"       "x5"       "x6"       "x7"       "x8"       "x9"       "x10"     
##  [11] "x11"      "x12"      "x13"      "x14"      "x15"      "x16"      "x17"      "x18"      "x19"      "x20"     
##  [21] "x21"      "x22"      "x23"      "x2byx1"   "x6byx5"   "x9byx7"   "x10byx8"  "x14byx12" "x15byx13" "x17byx16"
##  [31] "x19byx18" "x21byx20" "x23byx22" "x1log"    "x2log"    "x5log"    "x6log"    "x7log"    "x9log"    "x8log"   
##  [41] "x10log"   "x12log"   "x14log"   "x13log"   "x15log"   "x16log"   "x17log"   "x18log"   "x19log"   "x20log"  
##  [51] "x21log"   "x22log"   "x23log"   "x11log"   "x1sqinv"  "x5sqinv"  "x7sqinv"  "x8sqinv"  "x12sqinv" "x13sqinv"
##  [61] "x16sqinv" "x18sqinv" "x20sqinv" "x22sqinv" "stat1"    "stat2"    "stat3"    "stat4"    "stat5"    "stat6"   
##  [71] "stat7"    "stat8"    "stat9"    "stat10"   "stat11"   "stat12"   "stat13"   "stat14"   "stat15"   "stat16"  
##  [81] "stat17"   "stat18"   "stat19"   "stat20"   "stat21"   "stat22"   "stat23"   "stat24"   "stat25"   "stat26"  
##  [91] "stat27"   "stat28"   "stat29"   "stat30"   "stat31"   "stat32"   "stat33"   "stat34"   "stat35"   "stat36"  
## [101] "stat37"   "stat38"   "stat39"   "stat40"   "stat41"   "stat42"   "stat43"   "stat44"   "stat45"   "stat46"  
## [111] "stat47"   "stat48"   "stat49"   "stat50"   "stat51"   "stat52"   "stat53"   "stat54"   "stat55"   "stat56"  
## [121] "stat57"   "stat58"   "stat59"   "stat60"   "stat61"   "stat62"   "stat63"   "stat64"   "stat65"   "stat66"  
## [131] "stat67"   "stat68"   "stat69"   "stat70"   "stat71"   "stat72"   "stat73"   "stat74"   "stat75"   "stat76"  
## [141] "stat77"   "stat78"   "stat79"   "stat80"   "stat81"   "stat82"   "stat83"   "stat84"   "stat85"   "stat86"  
## [151] "stat87"   "stat88"   "stat89"   "stat90"   "stat91"   "stat92"   "stat93"   "stat94"   "stat95"   "stat96"  
## [161] "stat97"   "stat98"   "stat99"   "stat100"  "stat101"  "stat102"  "stat103"  "stat104"  "stat105"  "stat106" 
## [171] "stat107"  "stat108"  "stat109"  "stat110"  "stat111"  "stat112"  "stat113"  "stat114"  "stat115"  "stat116" 
## [181] "stat117"  "stat118"  "stat119"  "stat120"  "stat121"  "stat122"  "stat123"  "stat124"  "stat125"  "stat126" 
## [191] "stat127"  "stat128"  "stat129"  "stat130"  "stat131"  "stat132"  "stat133"  "stat134"  "stat135"  "stat136" 
## [201] "stat137"  "stat138"  "stat139"  "stat140"  "stat141"  "stat142"  "stat143"  "stat144"  "stat145"  "stat146" 
## [211] "stat147"  "stat148"  "stat149"  "stat150"  "stat151"  "stat152"  "stat153"  "stat154"  "stat155"  "stat156" 
## [221] "stat157"  "stat158"  "stat159"  "stat160"  "stat161"  "stat162"  "stat163"  "stat164"  "stat165"  "stat166" 
## [231] "stat167"  "stat168"  "stat169"  "stat170"  "stat171"  "stat172"  "stat173"  "stat174"  "stat175"  "stat176" 
## [241] "stat177"  "stat178"  "stat179"  "stat180"  "stat181"  "stat182"  "stat183"  "stat184"  "stat185"  "stat186" 
## [251] "stat187"  "stat188"  "stat189"  "stat190"  "stat191"  "stat192"  "stat193"  "stat194"  "stat195"  "stat196" 
## [261] "stat197"  "stat198"  "stat199"  "stat200"  "stat201"  "stat202"  "stat203"  "stat204"  "stat205"  "stat206" 
## [271] "stat207"  "stat208"  "stat209"  "stat210"  "stat211"  "stat212"  "stat213"  "stat214"  "stat215"  "stat216" 
## [281] "stat217"

All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)

Interesting Predictors

Histograms

if (eda == TRUE){
  cols = c('x11','x18','stat98','x7','stat110')
  df=gather(select_at(data,cols))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=3)
  
  # ggplot(gather(select_at(data,cols)), aes(sample=value)) + 
  #   stat_qq()+
  #   facet_wrap(~key, scales = 'free',ncol=2)
  
  lapply(select_at(data,cols),summary)
}

Scatter plot vs. output variable **y3.log

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light green',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=3)
}

All Predictors

Histograms

All indicators have a strong indication of Fat-Tails

if (eda == TRUE){
  df=gather(select_at(data,predictors))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=4)
}

Correlations

With Output Variable

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  # https://stackoverflow.com/questions/27034655/how-to-use-dplyrarrangedesc-when-using-a-string-as-column-name
  t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
                            ,select_at(data,output.var.tr)),4))  %>%
    #rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
    rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-!!sym(output.var.tr))
  #DT::datatable(t)
  message("Top Positive")
  #kable(head(arrange(t,desc(y3.log)),20))
  kable(head(arrange(t,desc(!!sym(output.var.tr))),20))
  message("Top Negative")
  #kable(head(arrange(t,y3.log),20))
  kable(head(arrange(t,!!sym(output.var.tr)),20))
}

Between All Variables

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
  #DT::datatable(t,options=list(scrollX=T))
  message("Showing only 10 variables")
  kable(t[1:10,1:10])
}

Scatter Plots with Output Variable

Scatter plots with all predictors and the output variable (y3.log)

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light blue',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=4)
}

Multicollinearity - VIF

No Multicollinearity among predictors

Showing Top predictor by VIF Value

if (eda == TRUE){
  vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
  head(vifDF,75)
}

Feature Eng

  • Square Root transformation for x18
data.tr=data %>%
  mutate(x18.sqrt = sqrt(x18)) 
cols=c('x18','x18.sqrt')

Comparing Pre and Post Transformation Density Plots

# ggplot(gather(select_at(data.tr,cols)), aes(value)) + 
#   geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
#   geom_density() + 
#   facet_wrap(~key, scales = 'free',ncol=4)

d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#removing unwanted variables
data.tr=data.tr %>%
  #dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])
  dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('JobName')])

data=data.tr
label.names=output.var.tr

Modeling

PCA

# 0 for no interaction, 
# 1 for Full 2 way interaction and 
# 2 for Selective 2 way interaction
# 3 for Selective 3 way interaction
InteractionMode = 2

pca.vars  = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]


# http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
# #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
registerDoParallel(cl)

if(InteractionMode == 1){
  pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
  #saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
  pca.model =  prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode >= 2 & InteractionMode <= 3){
  controlled.vars = pca.vars[grep("^x",pca.vars)]
  stat.vars = pca.vars[grep("^stat",pca.vars)]
  
  if (InteractionMode >= 2){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
  }
  if (InteractionMode >= 3){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^3')
  }
  no.interact.form = paste0(stat.vars, collapse ='+')
  
  pca.formula = as.formula(paste(interaction.form, no.interact.form, sep = "+"))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}

stopCluster(cl)
registerDoSEQ() # register sequential engine in case you are not using this function anymore
targetCumVar = .9

pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 164 PCAs justify 90.0% of the total Variance. (90.0%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained",   type='b')

plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')

screeplot(pca.model,npcs = pca.model$pcaSelCount)

screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')

#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>% 
  dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
                                 ,!!colnames(pca.model$rotation)[pca.model$pcaSel])
  )

Train Test Split

data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)

data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)

Common Functions

plot.diagnostics <-  function(model, train) {
  plot(model)
  
  residuals = resid(model) # Plotted above in plot(lm.out)
  r.standard = rstandard(model)
  r.student = rstudent(model)
  
  df = data.frame(x=predict(model,train),y=r.student)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = 0,size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  
  df = data.frame(x=predict(model,train),y=r.standard)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = c(-2,0,2),size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  # Histogram
  df=data.frame(r.student)
  p=ggplot(data=df,aes(r.student)) +
    geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) + 
    stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
    ylab("Density")+
    xlab("Studentized Residuals")+
    ggtitle("Distribution of Studentized Residuals")
  plot(p)
  # http://www.stat.columbia.edu/~martin/W2024/R7.pdf
  # Influential plots
  inf.meas = influence.measures(model)
  # print (summary(inf.meas)) # too much data
  
  # Leverage plot
  lev = hat(model.matrix(model))
  df=tibble::rownames_to_column(as.data.frame(lev),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    ylab('Leverage - check') + 
    xlab('Index')
  plot(p)
  # Cook's Distance
  cd = cooks.distance(model)
  df=tibble::rownames_to_column(as.data.frame(cd),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
    ylab('Cooks distances') + 
    geom_hline(yintercept = c(4/nrow(train),0),size=1)+
    xlab('Index')
  plot(p)
  print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = "")) 
  print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = "")) 
  return(cd)
}

# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html 
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
  #B is the number of resamples and integer vector of M (numbers + tune length if any)
  B <- if (method == "cv") numbers
  else if(method == "repeatedcv") numbers * repeats
  else NULL
  if(is.null(length)) {
    seeds <- NULL
  } else {
    set.seed(seed = seed)
    seeds <- vector(mode = "list", length = B)
    seeds <- lapply(seeds, function(x) sample.int(n = 1000000
                                                  , size = numbers + ifelse(is.null(tunes), 0, tunes)))
    seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
  }
  # return seeds
  seeds
}



train.caret.glmselect = function(formula, data, method
                                 ,subopt = NULL, feature.names
                                 , train.control = NULL, tune.grid = NULL, pre.proc = NULL){
  
  if(is.null(train.control)){
    train.control <- trainControl(method = "cv"
                              ,number = 10
                              ,seeds = setCaretSeeds(method = "cv"
                                                     , numbers = 10
                                                     , seed = 1701)
                              ,search = "grid"
                              ,verboseIter = TRUE
                              ,allowParallel = TRUE
                              )
  }
  
  if(is.null(tune.grid)){
    if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
      tune.grid = data.frame(nvmax = 1:length(feature.names))
    }
    if (method == 'glmnet' && subopt == 'LASSO'){
      # Will only show 1 Lambda value during training, but that is OK
      # https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
      # Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
      lambda = 10^seq(-2,0, length =100)
      alpha = c(1)
      tune.grid = expand.grid(alpha = alpha,lambda = lambda)
    }
    if (method == 'lars'){
      # https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
      fraction = seq(0, 1, length = 100)
      tune.grid = expand.grid(fraction = fraction)
      pre.proc = c("center", "scale") 
    }
  }
  
  # http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
  # #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
  cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
  registerDoParallel(cl)

  set.seed(1) 
  # note that the seed has to actually be set just before this function is called
  # settign is above just not ensure reproducibility for some reason
  model.caret <- caret::train(formula
                              , data = data
                              , method = method
                              , tuneGrid = tune.grid
                              , trControl = train.control
                              , preProc = pre.proc
                              )
  
  stopCluster(cl)
  registerDoSEQ() # register sequential engine in case you are not using this function anymore
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    print("All models results")
    print(model.caret$results) # all model results
    print("Best Model")
    print(model.caret$bestTune) # best model
    model = model.caret$finalModel

    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-nvmax) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    # leap function does not support studentized residuals
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)
   
    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    id = rownames(model.caret$bestTune)    
    # Provides the coefficients of the best model
    # regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
    # https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
    print("Coefficients of final model:")
    coefs <- coef(model, id=id)
    #calculate the model to the the coef intervals
    nams <- names(coefs)
    nams <- nams[!nams %in% "(Intercept)"]
    response <-  as.character(formula[[2]])
    form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
    mod <- lm(form, data = data)
    #coefs
    #coef(mod)
    print(car::Confint(mod))
    return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
                ,modelLM=mod))
  }
  if (method == 'glmnet' && subopt == 'LASSO'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    print(model.caret$results)
    model=model.caret$finalModel
    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-lambda) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot 
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') +
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    #no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
    t=coef(model,s=model.caret$bestTune$lambda)
    model.coef = t[which(t[,1]!=0),]
    print(as.data.frame(model.coef))
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
  }
  if (method == 'lars'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    # Metrics Plot
    dataPlot = model.caret$results %>%
        gather(key='metric',value='value',-fraction) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
    model.coef = t[which(t!=0)]
    print(model.coef)
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
  }
}

# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
    #form <- as.formula(object$call[[2]])
    mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
    coefi <- coef(object, id = id)
    xvars <- names(coefi)
    return(mat[,xvars]%*%coefi)
}
  
test.model = function(model, test, level=0.95
                      ,draw.limits = FALSE, good = 0.1, ok = 0.15
                      ,method = NULL, subopt = NULL
                      ,id = NULL, formula, feature.names, label.names
                      ,transformation = NULL){
  ## if using caret for glm select equivalent functionality, 
  ## need to pass formula (full is ok as it will select subset of variables from there)
  if (is.null(method)){
    pred = predict(model, newdata=test, interval="confidence", level = level) 
  }
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
  }
  
  if (method == 'glmnet' && subopt == 'LASSO'){
    xtest = as.matrix(test[,feature.names]) 
    pred=as.data.frame(predict(model, xtest))
  }
  
  if (method == 'lars'){
    pred=as.data.frame(predict(model, newdata = test))
  }
    
  # Summary of predicted values
  print ("Summary of predicted values: ")
  print(summary(pred[,1]))

  test.mse = mean((test[,label.names]-pred[,1])^2)
  print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
  
  test.rmse = sqrt(test.mse)
  print (paste(method, subopt, "Test RMSE:", test.rmse, sep=" "))
  
  if(log.pred == TRUE || norm.pred == TRUE){
    # plot transformewd comparison first
    df=data.frame(x=test[,label.names],y=pred[,1])
    ggplot(df,aes(x=x,y=y)) +
      geom_point(color='blue',alpha=0.5,shape=20,size=2) +
      geom_abline(slope=1,intercept=0,color='black',size=1) +
      #scale_y_continuous(limits=c(min(df),max(df)))+
      xlab("Actual (Transformed)")+
      ylab("Predicted (Transformed)")
  }
    
  if (log.pred == FALSE && norm.pred == FALSE){
    x = test[,label.names]
    y = pred[,1]
  }
  if (log.pred == TRUE){
    x = 10^test[,label.names]
    y = 10^pred[,1]
    # x = (test[,label.names])^3
    # y = (pred[,1])^3
  }
  if (norm.pred == TRUE){
    x = predict(transformation, test[,label.names], inverse = TRUE)
    y = predict(transformation, pred[,1], inverse = TRUE)
  }

  test.mse = mean((x-y)^2)
  print (paste(method, subopt, "Test MSE (Org Scale):", test.mse, sep=" "))
  
  test.rmse = sqrt(test.mse)
  print (paste(method, subopt, "Test RMSE (Org Scale):", test.rmse, sep=" "))

  df=data.frame(x,y)
  ggplot(df,aes(x,y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
                ,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
    #scale_y_continuous(limits=c(min(df),max(df)))+
    xlab("Actual")+
    ylab("Predicted")
    
 
}

Setup Formulae

n <- names(data.train)
 formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
                             ," ~", paste(n[!n %in% label.names], collapse = " + "))) 

grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))

print(formula)
## y3.log ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + 
##     PC10 + PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + 
##     PC19 + PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + PC27 + 
##     PC28 + PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + PC36 + 
##     PC37 + PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + PC45 + 
##     PC46 + PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + PC54 + 
##     PC55 + PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + PC63 + 
##     PC64 + PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + PC72 + 
##     PC73 + PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + PC81 + 
##     PC82 + PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + PC90 + 
##     PC91 + PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + PC99 + 
##     PC100 + PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + PC107 + 
##     PC108 + PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + PC115 + 
##     PC116 + PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + PC123 + 
##     PC124 + PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + PC131 + 
##     PC132 + PC133 + PC134 + PC135 + PC136 + PC137 + PC138 + PC139 + 
##     PC140 + PC141 + PC142 + PC143 + PC144 + PC145 + PC146 + PC147 + 
##     PC148 + PC149 + PC150 + PC151 + PC152 + PC153 + PC154 + PC155 + 
##     PC156 + PC157 + PC158 + PC159 + PC160 + PC161 + PC162 + PC163 + 
##     PC164
print(grand.mean.formula)
## y3.log ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]

Full Model

model.full = lm(formula , data.train)
summary(model.full)
## 
## Call:
## lm(formula = formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.084858 -0.022051 -0.005534  0.016797  0.183125 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.097e+00  4.231e-04 4955.559  < 2e-16 ***
## PC1         -4.836e-04  3.691e-05  -13.100  < 2e-16 ***
## PC2         -9.448e-04  3.746e-05  -25.218  < 2e-16 ***
## PC3         -4.371e-04  3.770e-05  -11.595  < 2e-16 ***
## PC4         -3.491e-04  3.820e-05   -9.140  < 2e-16 ***
## PC5          2.325e-04  3.938e-05    5.904 3.76e-09 ***
## PC6         -9.929e-05  3.936e-05   -2.522 0.011685 *  
## PC7         -2.033e-04  4.018e-05   -5.061 4.31e-07 ***
## PC8         -3.725e-05  4.115e-05   -0.905 0.365319    
## PC9         -5.154e-05  4.212e-05   -1.224 0.221164    
## PC10        -3.929e-06  4.285e-05   -0.092 0.926936    
## PC11        -5.386e-04  4.566e-05  -11.796  < 2e-16 ***
## PC12        -5.038e-04  4.810e-05  -10.475  < 2e-16 ***
## PC13         3.436e-04  4.902e-05    7.009 2.70e-12 ***
## PC14         2.536e-04  5.077e-05    4.995 6.08e-07 ***
## PC15        -3.593e-05  5.159e-05   -0.696 0.486188    
## PC16         3.542e-04  5.229e-05    6.775 1.38e-11 ***
## PC17        -2.002e-04  5.519e-05   -3.628 0.000289 ***
## PC18        -3.652e-04  5.740e-05   -6.362 2.15e-10 ***
## PC19         4.371e-05  5.810e-05    0.752 0.451900    
## PC20         4.121e-04  6.354e-05    6.485 9.64e-11 ***
## PC21         8.386e-05  6.633e-05    1.264 0.206192    
## PC22         9.425e-05  1.036e-04    0.910 0.362771    
## PC23         2.060e-04  1.278e-04    1.612 0.107072    
## PC24        -7.978e-04  1.489e-04   -5.358 8.76e-08 ***
## PC25         2.484e-04  1.685e-04    1.475 0.140375    
## PC26         3.859e-04  1.727e-04    2.235 0.025489 *  
## PC27         2.691e-04  1.719e-04    1.565 0.117537    
## PC28         3.567e-05  1.757e-04    0.203 0.839129    
## PC29         3.666e-04  1.917e-04    1.912 0.055931 .  
## PC30        -7.683e-05  1.976e-04   -0.389 0.697371    
## PC31        -5.625e-05  2.119e-04   -0.265 0.790692    
## PC32        -7.103e-04  2.131e-04   -3.333 0.000866 ***
## PC33         7.017e-04  2.176e-04    3.224 0.001270 ** 
## PC34         1.104e-03  2.302e-04    4.795 1.67e-06 ***
## PC35         7.582e-05  2.455e-04    0.309 0.757414    
## PC36         2.574e-05  2.473e-04    0.104 0.917101    
## PC37        -3.663e-04  2.568e-04   -1.427 0.153755    
## PC38         1.998e-04  2.654e-04    0.753 0.451677    
## PC39        -2.058e-04  2.738e-04   -0.751 0.452406    
## PC40        -8.637e-05  2.737e-04   -0.316 0.752317    
## PC41        -1.524e-04  2.836e-04   -0.537 0.591066    
## PC42        -2.202e-04  2.894e-04   -0.761 0.446639    
## PC43         1.337e-05  2.887e-04    0.046 0.963070    
## PC44         6.412e-04  2.907e-04    2.206 0.027444 *  
## PC45        -2.900e-04  2.920e-04   -0.993 0.320687    
## PC46         1.042e-04  2.918e-04    0.357 0.721170    
## PC47        -4.826e-04  2.925e-04   -1.650 0.099030 .  
## PC48         1.024e-04  2.956e-04    0.346 0.729105    
## PC49         3.405e-04  2.984e-04    1.141 0.253751    
## PC50         7.322e-06  3.012e-04    0.024 0.980608    
## PC51        -4.159e-05  3.023e-04   -0.138 0.890557    
## PC52        -5.405e-05  3.040e-04   -0.178 0.858906    
## PC53         1.362e-04  3.031e-04    0.449 0.653287    
## PC54        -4.487e-05  3.087e-04   -0.145 0.884432    
## PC55        -4.854e-05  3.126e-04   -0.155 0.876624    
## PC56        -1.266e-05  3.123e-04   -0.041 0.967672    
## PC57        -7.543e-04  3.146e-04   -2.398 0.016537 *  
## PC58        -1.222e-04  3.146e-04   -0.388 0.697747    
## PC59         9.754e-04  3.131e-04    3.115 0.001847 ** 
## PC60        -8.101e-05  3.170e-04   -0.256 0.798317    
## PC61         1.282e-04  3.210e-04    0.399 0.689672    
## PC62        -3.748e-04  3.231e-04   -1.160 0.246157    
## PC63        -6.974e-04  3.213e-04   -2.171 0.030008 *  
## PC64        -9.024e-04  3.249e-04   -2.778 0.005493 ** 
## PC65        -3.464e-05  3.276e-04   -0.106 0.915811    
## PC66        -4.220e-04  3.283e-04   -1.286 0.198642    
## PC67         2.344e-04  3.271e-04    0.717 0.473691    
## PC68         4.975e-04  3.298e-04    1.508 0.131490    
## PC69         9.419e-05  3.297e-04    0.286 0.775129    
## PC70         1.949e-04  3.312e-04    0.588 0.556264    
## PC71         5.198e-04  3.331e-04    1.560 0.118741    
## PC72         2.656e-05  3.325e-04    0.080 0.936334    
## PC73         4.745e-04  3.353e-04    1.415 0.157123    
## PC74        -6.569e-04  3.375e-04   -1.946 0.051682 .  
## PC75        -8.762e-04  3.404e-04   -2.574 0.010080 *  
## PC76         6.320e-06  3.384e-04    0.019 0.985100    
## PC77         4.872e-04  3.390e-04    1.437 0.150734    
## PC78         2.820e-04  3.408e-04    0.828 0.407922    
## PC79         5.576e-04  3.441e-04    1.620 0.105196    
## PC80        -1.334e-04  3.483e-04   -0.383 0.701816    
## PC81         7.199e-04  3.446e-04    2.089 0.036750 *  
## PC82         4.346e-04  3.524e-04    1.233 0.217601    
## PC83        -7.136e-04  3.506e-04   -2.035 0.041852 *  
## PC84         8.048e-04  3.517e-04    2.288 0.022157 *  
## PC85         1.115e-03  3.563e-04    3.130 0.001758 ** 
## PC86        -9.678e-05  3.535e-04   -0.274 0.784252    
## PC87         1.713e-03  3.537e-04    4.843 1.32e-06 ***
## PC88        -1.142e-03  3.603e-04   -3.170 0.001531 ** 
## PC89        -5.352e-04  3.592e-04   -1.490 0.136308    
## PC90        -5.145e-04  3.599e-04   -1.430 0.152905    
## PC91         6.712e-06  3.597e-04    0.019 0.985114    
## PC92         2.730e-04  3.569e-04    0.765 0.444363    
## PC93         8.076e-05  3.609e-04    0.224 0.822961    
## PC94        -9.231e-04  3.633e-04   -2.541 0.011088 *  
## PC95         1.072e-04  3.593e-04    0.298 0.765347    
## PC96        -4.540e-04  3.669e-04   -1.238 0.215948    
## PC97        -5.048e-04  3.644e-04   -1.385 0.166058    
## PC98        -4.852e-04  3.638e-04   -1.334 0.182364    
## PC99        -4.368e-04  3.650e-04   -1.197 0.231481    
## PC100       -5.241e-05  3.654e-04   -0.143 0.885948    
## PC101       -1.831e-04  3.669e-04   -0.499 0.617851    
## PC102       -5.931e-04  3.658e-04   -1.621 0.105010    
## PC103        1.642e-04  3.687e-04    0.445 0.656090    
## PC104       -6.728e-04  3.692e-04   -1.823 0.068420 .  
## PC105        4.885e-04  3.706e-04    1.318 0.187487    
## PC106        1.240e-03  3.716e-04    3.336 0.000856 ***
## PC107        6.011e-04  3.722e-04    1.615 0.106331    
## PC108        7.268e-05  3.716e-04    0.196 0.844939    
## PC109        5.346e-04  3.721e-04    1.437 0.150860    
## PC110       -5.618e-04  3.744e-04   -1.500 0.133597    
## PC111       -8.212e-04  3.762e-04   -2.183 0.029067 *  
## PC112       -1.273e-04  3.752e-04   -0.339 0.734345    
## PC113        3.000e-04  3.772e-04    0.795 0.426421    
## PC114       -7.443e-04  3.734e-04   -1.994 0.046256 *  
## PC115       -1.656e-03  3.763e-04   -4.400 1.10e-05 ***
## PC116       -1.029e-04  3.796e-04   -0.271 0.786422    
## PC117        4.354e-05  3.766e-04    0.116 0.907971    
## PC118        7.136e-04  3.792e-04    1.882 0.059899 .  
## PC119       -5.390e-04  3.793e-04   -1.421 0.155334    
## PC120        2.316e-04  3.777e-04    0.613 0.539853    
## PC121       -4.007e-04  3.807e-04   -1.052 0.292664    
## PC122        4.944e-04  3.784e-04    1.307 0.191340    
## PC123       -5.427e-04  3.819e-04   -1.421 0.155336    
## PC124        2.045e-04  3.795e-04    0.539 0.590077    
## PC125        4.804e-04  3.826e-04    1.256 0.209334    
## PC126        5.539e-05  3.792e-04    0.146 0.883859    
## PC127        8.249e-05  3.831e-04    0.215 0.829529    
## PC128       -1.001e-03  3.825e-04   -2.618 0.008862 ** 
## PC129        6.049e-05  3.845e-04    0.157 0.875019    
## PC130        4.180e-04  3.826e-04    1.092 0.274734    
## PC131       -1.473e-03  3.820e-04   -3.855 0.000117 ***
## PC132        3.136e-04  3.862e-04    0.812 0.416845    
## PC133       -1.986e-04  3.855e-04   -0.515 0.606485    
## PC134        9.860e-04  3.854e-04    2.558 0.010551 *  
## PC135        4.438e-04  3.851e-04    1.152 0.249261    
## PC136        5.524e-04  3.881e-04    1.423 0.154713    
## PC137       -7.427e-04  3.875e-04   -1.917 0.055339 .  
## PC138        5.633e-04  3.893e-04    1.447 0.147996    
## PC139       -7.444e-04  3.870e-04   -1.923 0.054476 .  
## PC140       -3.928e-04  3.885e-04   -1.011 0.311962    
## PC141        3.842e-04  3.866e-04    0.994 0.320412    
## PC142        6.662e-05  3.890e-04    0.171 0.864009    
## PC143        3.135e-04  3.887e-04    0.806 0.420014    
## PC144        1.028e-03  3.901e-04    2.635 0.008443 ** 
## PC145        1.961e-04  3.900e-04    0.503 0.615043    
## PC146        5.962e-04  3.937e-04    1.514 0.129983    
## PC147       -2.624e-04  3.916e-04   -0.670 0.502811    
## PC148       -5.040e-04  3.911e-04   -1.289 0.197540    
## PC149        1.457e-04  3.925e-04    0.371 0.710541    
## PC150        6.544e-05  3.936e-04    0.166 0.867974    
## PC151        6.700e-04  3.936e-04    1.702 0.088822 .  
## PC152       -6.890e-04  3.934e-04   -1.751 0.079947 .  
## PC153        4.644e-04  3.968e-04    1.170 0.241953    
## PC154       -8.551e-04  3.946e-04   -2.167 0.030272 *  
## PC155        1.077e-03  3.955e-04    2.722 0.006505 ** 
## PC156        1.360e-03  3.960e-04    3.433 0.000602 ***
## PC157       -2.644e-04  3.943e-04   -0.671 0.502549    
## PC158        2.302e-05  3.979e-04    0.058 0.953875    
## PC159        2.127e-03  3.983e-04    5.340 9.69e-08 ***
## PC160        2.107e-04  3.997e-04    0.527 0.598154    
## PC161        3.417e-04  3.968e-04    0.861 0.389252    
## PC162       -1.143e-03  4.001e-04   -2.857 0.004295 ** 
## PC163        6.759e-04  3.976e-04    1.700 0.089230 .  
## PC164        2.856e-04  4.011e-04    0.712 0.476382    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03152 on 5419 degrees of freedom
## Multiple R-squared:  0.2723, Adjusted R-squared:  0.2503 
## F-statistic: 12.36 on 164 and 5419 DF,  p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)

## [1] "Number of data points that have Cook's D > 4/n: 268"
## [1] "Number of data points that have Cook's D > 1: 0"

Checking with removal of high influence points

high.cd = names(cd.full[cd.full > 4/nrow(data.train)])

#save dataset with high.cd flagged
t = data.train %>% 
  rownames_to_column() %>%
  mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
## 
## Call:
## lm(formula = formula, data = data.train2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05672 -0.01919 -0.00363  0.01675  0.08122 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.093e+00  3.531e-04 5928.987  < 2e-16 ***
## PC1         -5.034e-04  3.140e-05  -16.030  < 2e-16 ***
## PC2         -9.257e-04  3.131e-05  -29.568  < 2e-16 ***
## PC3         -4.501e-04  3.167e-05  -14.211  < 2e-16 ***
## PC4         -3.651e-04  3.191e-05  -11.443  < 2e-16 ***
## PC5          2.259e-04  3.314e-05    6.816 1.04e-11 ***
## PC6         -7.324e-05  3.293e-05   -2.224 0.026187 *  
## PC7         -2.212e-04  3.361e-05   -6.581 5.12e-11 ***
## PC8         -3.088e-05  3.449e-05   -0.895 0.370608    
## PC9         -7.797e-06  3.523e-05   -0.221 0.824864    
## PC10         7.869e-06  3.591e-05    0.219 0.826579    
## PC11        -6.081e-04  3.812e-05  -15.950  < 2e-16 ***
## PC12        -5.098e-04  4.010e-05  -12.714  < 2e-16 ***
## PC13         3.338e-04  4.097e-05    8.149 4.57e-16 ***
## PC14         2.368e-04  4.226e-05    5.605 2.19e-08 ***
## PC15        -5.593e-05  4.314e-05   -1.296 0.194885    
## PC16         3.220e-04  4.363e-05    7.380 1.84e-13 ***
## PC17        -2.158e-04  4.599e-05   -4.691 2.79e-06 ***
## PC18        -3.629e-04  4.789e-05   -7.577 4.15e-14 ***
## PC19         7.338e-05  4.852e-05    1.513 0.130449    
## PC20         4.288e-04  5.301e-05    8.090 7.38e-16 ***
## PC21         9.219e-05  5.537e-05    1.665 0.095973 .  
## PC22         1.381e-04  8.630e-05    1.600 0.109588    
## PC23         2.000e-04  1.082e-04    1.848 0.064598 .  
## PC24        -8.489e-04  1.246e-04   -6.812 1.07e-11 ***
## PC25         3.575e-04  1.417e-04    2.523 0.011669 *  
## PC26         3.303e-04  1.444e-04    2.287 0.022250 *  
## PC27         1.512e-04  1.445e-04    1.046 0.295399    
## PC28         1.902e-05  1.477e-04    0.129 0.897552    
## PC29         4.153e-04  1.601e-04    2.595 0.009498 ** 
## PC30        -5.140e-05  1.662e-04   -0.309 0.757196    
## PC31        -1.370e-04  1.786e-04   -0.767 0.443107    
## PC32        -6.672e-04  1.785e-04   -3.739 0.000187 ***
## PC33         2.763e-04  1.840e-04    1.502 0.133266    
## PC34         1.059e-03  1.922e-04    5.511 3.75e-08 ***
## PC35         2.186e-04  2.076e-04    1.053 0.292483    
## PC36        -6.446e-05  2.078e-04   -0.310 0.756452    
## PC37        -5.049e-04  2.149e-04   -2.350 0.018810 *  
## PC38         1.114e-04  2.220e-04    0.502 0.615826    
## PC39        -1.252e-04  2.388e-04   -0.524 0.600011    
## PC40        -1.215e-04  2.316e-04   -0.525 0.599779    
## PC41        -3.360e-04  2.397e-04   -1.402 0.160972    
## PC42         1.117e-04  2.438e-04    0.458 0.646730    
## PC43         2.907e-04  2.436e-04    1.194 0.232721    
## PC44         3.989e-04  2.486e-04    1.605 0.108606    
## PC45        -1.190e-04  2.453e-04   -0.485 0.627501    
## PC46         1.418e-04  2.466e-04    0.575 0.565425    
## PC47        -4.521e-04  2.461e-04   -1.837 0.066253 .  
## PC48         1.111e-04  2.488e-04    0.447 0.655212    
## PC49         4.302e-04  2.515e-04    1.710 0.087267 .  
## PC50        -1.990e-04  2.545e-04   -0.782 0.434306    
## PC51         1.372e-04  2.567e-04    0.535 0.592990    
## PC52        -2.241e-04  2.564e-04   -0.874 0.382227    
## PC53         1.746e-04  2.557e-04    0.683 0.494703    
## PC54        -3.477e-04  2.627e-04   -1.324 0.185712    
## PC55        -4.148e-04  2.638e-04   -1.573 0.115894    
## PC56         1.240e-04  2.660e-04    0.466 0.640965    
## PC57        -7.998e-04  2.657e-04   -3.010 0.002625 ** 
## PC58        -4.297e-04  2.671e-04   -1.609 0.107752    
## PC59         1.015e-03  2.661e-04    3.813 0.000139 ***
## PC60        -3.669e-04  2.687e-04   -1.366 0.172155    
## PC61        -2.166e-05  2.694e-04   -0.080 0.935928    
## PC62        -1.893e-04  2.724e-04   -0.695 0.487022    
## PC63        -5.771e-04  2.699e-04   -2.138 0.032562 *  
## PC64        -7.240e-04  2.733e-04   -2.649 0.008094 ** 
## PC65        -1.539e-04  2.756e-04   -0.558 0.576533    
## PC66        -1.636e-04  2.781e-04   -0.588 0.556342    
## PC67         1.505e-04  2.781e-04    0.541 0.588459    
## PC68         6.390e-04  2.789e-04    2.291 0.021990 *  
## PC69         3.767e-04  2.781e-04    1.354 0.175687    
## PC70         3.950e-04  2.766e-04    1.428 0.153317    
## PC71         3.641e-04  2.791e-04    1.304 0.192177    
## PC72        -1.055e-04  2.797e-04   -0.377 0.706032    
## PC73         4.437e-04  2.800e-04    1.585 0.113072    
## PC74        -2.907e-04  2.848e-04   -1.021 0.307478    
## PC75        -6.510e-04  2.870e-04   -2.268 0.023341 *  
## PC76        -2.030e-04  2.835e-04   -0.716 0.473992    
## PC77         3.497e-04  2.840e-04    1.231 0.218286    
## PC78        -6.564e-05  2.866e-04   -0.229 0.818839    
## PC79         7.159e-04  2.891e-04    2.476 0.013315 *  
## PC80        -1.919e-04  2.907e-04   -0.660 0.509199    
## PC81         8.874e-04  2.882e-04    3.080 0.002084 ** 
## PC82         3.492e-04  2.954e-04    1.182 0.237253    
## PC83        -6.782e-04  2.963e-04   -2.289 0.022115 *  
## PC84         7.847e-04  2.961e-04    2.650 0.008068 ** 
## PC85         1.380e-03  3.013e-04    4.581 4.75e-06 ***
## PC86         9.588e-05  2.964e-04    0.324 0.746319    
## PC87         1.481e-03  2.955e-04    5.013 5.55e-07 ***
## PC88        -9.719e-04  3.015e-04   -3.223 0.001276 ** 
## PC89        -3.580e-04  3.014e-04   -1.188 0.235042    
## PC90        -3.864e-04  3.019e-04   -1.280 0.200633    
## PC91        -1.221e-04  2.998e-04   -0.407 0.683912    
## PC92         4.982e-04  2.978e-04    1.673 0.094410 .  
## PC93        -2.352e-04  3.036e-04   -0.775 0.438562    
## PC94        -7.713e-04  3.045e-04   -2.533 0.011338 *  
## PC95         2.487e-04  3.029e-04    0.821 0.411519    
## PC96        -2.991e-04  3.071e-04   -0.974 0.330041    
## PC97        -3.630e-04  3.045e-04   -1.192 0.233365    
## PC98        -5.363e-04  3.041e-04   -1.764 0.077835 .  
## PC99        -1.667e-04  3.047e-04   -0.547 0.584256    
## PC100       -4.433e-05  3.048e-04   -0.145 0.884353    
## PC101       -3.681e-04  3.069e-04   -1.199 0.230501    
## PC102       -4.342e-04  3.055e-04   -1.421 0.155281    
## PC103       -9.655e-05  3.088e-04   -0.313 0.754536    
## PC104       -6.635e-04  3.073e-04   -2.159 0.030875 *  
## PC105        6.801e-04  3.098e-04    2.195 0.028181 *  
## PC106        1.154e-03  3.094e-04    3.729 0.000194 ***
## PC107        6.590e-04  3.127e-04    2.108 0.035102 *  
## PC108        1.136e-05  3.102e-04    0.037 0.970776    
## PC109        4.196e-04  3.115e-04    1.347 0.178116    
## PC110       -5.413e-04  3.133e-04   -1.727 0.084144 .  
## PC111       -9.389e-04  3.147e-04   -2.984 0.002861 ** 
## PC112       -8.764e-05  3.134e-04   -0.280 0.779752    
## PC113        1.350e-04  3.150e-04    0.429 0.668236    
## PC114       -8.034e-04  3.123e-04   -2.573 0.010123 *  
## PC115       -1.890e-03  3.146e-04   -6.010 1.99e-09 ***
## PC116       -9.974e-07  3.173e-04   -0.003 0.997492    
## PC117        1.906e-04  3.148e-04    0.605 0.544903    
## PC118        4.988e-04  3.163e-04    1.577 0.114890    
## PC119       -3.598e-04  3.170e-04   -1.135 0.256510    
## PC120        1.076e-04  3.164e-04    0.340 0.733921    
## PC121       -6.644e-04  3.173e-04   -2.094 0.036317 *  
## PC122        3.526e-04  3.154e-04    1.118 0.263674    
## PC123       -5.212e-04  3.196e-04   -1.631 0.102958    
## PC124       -2.618e-04  3.177e-04   -0.824 0.409991    
## PC125        6.122e-04  3.197e-04    1.915 0.055558 .  
## PC126        3.150e-05  3.167e-04    0.099 0.920752    
## PC127       -1.166e-04  3.195e-04   -0.365 0.715173    
## PC128       -9.783e-04  3.193e-04   -3.064 0.002193 ** 
## PC129       -2.623e-04  3.233e-04   -0.811 0.417187    
## PC130        3.091e-04  3.193e-04    0.968 0.332960    
## PC131       -8.760e-04  3.194e-04   -2.743 0.006115 ** 
## PC132        2.770e-04  3.227e-04    0.858 0.390718    
## PC133       -2.061e-04  3.238e-04   -0.636 0.524486    
## PC134        7.610e-04  3.222e-04    2.362 0.018219 *  
## PC135        1.727e-04  3.212e-04    0.538 0.590802    
## PC136        6.016e-04  3.252e-04    1.850 0.064343 .  
## PC137       -9.984e-04  3.234e-04   -3.087 0.002029 ** 
## PC138        6.008e-04  3.267e-04    1.839 0.065932 .  
## PC139       -4.006e-04  3.240e-04   -1.236 0.216373    
## PC140       -5.621e-04  3.247e-04   -1.731 0.083454 .  
## PC141        5.247e-04  3.235e-04    1.622 0.104854    
## PC142        2.124e-04  3.248e-04    0.654 0.513025    
## PC143        3.476e-04  3.242e-04    1.072 0.283636    
## PC144        7.608e-04  3.261e-04    2.333 0.019666 *  
## PC145        4.973e-04  3.258e-04    1.526 0.126971    
## PC146        9.733e-04  3.290e-04    2.958 0.003107 ** 
## PC147       -1.885e-04  3.284e-04   -0.574 0.565982    
## PC148       -4.102e-04  3.252e-04   -1.261 0.207324    
## PC149       -1.014e-04  3.283e-04   -0.309 0.757431    
## PC150        3.459e-04  3.293e-04    1.050 0.293564    
## PC151        7.120e-04  3.276e-04    2.174 0.029787 *  
## PC152       -4.921e-04  3.285e-04   -1.498 0.134224    
## PC153        2.756e-04  3.308e-04    0.833 0.404865    
## PC154       -5.758e-04  3.301e-04   -1.745 0.081113 .  
## PC155        8.040e-04  3.301e-04    2.436 0.014880 *  
## PC156        1.114e-03  3.318e-04    3.357 0.000794 ***
## PC157       -4.443e-06  3.294e-04   -0.013 0.989239    
## PC158        9.148e-05  3.326e-04    0.275 0.783298    
## PC159        1.606e-03  3.321e-04    4.834 1.38e-06 ***
## PC160        1.774e-04  3.360e-04    0.528 0.597676    
## PC161       -5.438e-05  3.318e-04   -0.164 0.869818    
## PC162       -1.072e-03  3.337e-04   -3.214 0.001318 ** 
## PC163        6.251e-04  3.330e-04    1.877 0.060568 .  
## PC164        3.103e-04  3.351e-04    0.926 0.354463    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02562 on 5151 degrees of freedom
## Multiple R-squared:  0.3602, Adjusted R-squared:  0.3398 
## F-statistic: 17.68 on 164 and 5151 DF,  p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)

## [1] "Number of data points that have Cook's D > 4/n: 222"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before. 
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot 
plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,target=one_of(label.names))

ggplot(data=plotData, aes(x=type,y=target)) +
  geom_boxplot(fill='light blue',outlier.shape=NA) +
  scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
  theme_light() +
  ggtitle('Distribution of High Leverage Points and Normal  Points')

# 2 sample t-tests

plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,one_of(feature.names))

comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
                   , function(x) t.test(x ~ plotData$type, var.equal = TRUE)) 

sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
##          PC1          PC6         PC11         PC23         PC25         PC26         PC28         PC31         PC33 
## 1.114429e-04 2.900174e-02 3.453559e-05 1.870981e-03 1.469834e-03 4.459918e-03 7.170787e-03 1.788345e-03 1.625765e-04 
##         PC40         PC41         PC42         PC45         PC57         PC58         PC75        PC131        PC161 
## 4.821597e-02 1.220165e-02 4.394526e-02 4.240457e-02 3.886254e-02 1.006641e-02 1.418026e-02 4.893946e-02 3.717655e-02
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=5, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

# Distribution (box) Plots
mm = melt(plotData, id=c('type'))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=8, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

Grand Means Model

model.null = lm(grand.mean.formula, data.train)
summary(model.null)
## 
## Call:
## lm(formula = grand.mean.formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.114932 -0.023964 -0.003377  0.020682  0.190380 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.0968082  0.0004871    4304   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0364 on 5583 degrees of freedom

Variable Selection

Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/

Forward Selection with CV

Train

if (algo.forward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   , data = data.train
                                   , method = "leapForward"
                                   , feature.names = feature.names)
  model.forward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE      RMSESD RsquaredSD        MAESD
## 1       1 0.03477291 0.08867342 0.02692328 0.001241527 0.02694524 0.0007080957
## 2       2 0.03430526 0.11280445 0.02659499 0.001382164 0.03177241 0.0008448670
## 3       3 0.03397028 0.13012834 0.02629226 0.001480367 0.03628599 0.0009228758
## 4       4 0.03357202 0.15065682 0.02594497 0.001495727 0.03862072 0.0008898614
## 5       5 0.03329810 0.16453122 0.02572753 0.001417074 0.04025876 0.0008731997
## 6       6 0.03307787 0.17576857 0.02556677 0.001504730 0.04328083 0.0009446991
## 7       7 0.03306407 0.17615661 0.02557869 0.001452171 0.03968875 0.0009114778
## 8       8 0.03295739 0.18134530 0.02547827 0.001388292 0.03792930 0.0008274437
## 9       9 0.03287939 0.18540661 0.02543399 0.001348211 0.03725882 0.0008444702
## 10     10 0.03275470 0.19146665 0.02537654 0.001412011 0.03832877 0.0008875603
## 11     11 0.03263033 0.19759284 0.02525113 0.001364011 0.03659311 0.0008756810
## 12     12 0.03257233 0.20048903 0.02523311 0.001354071 0.03672316 0.0008542372
## 13     13 0.03251170 0.20352002 0.02521402 0.001350424 0.03679330 0.0008351289
## 14     14 0.03244689 0.20662371 0.02517335 0.001343122 0.03644511 0.0008246527
## 15     15 0.03237508 0.21024129 0.02509782 0.001358417 0.03834147 0.0008435020
## 16     16 0.03229967 0.21374950 0.02503485 0.001309698 0.03485740 0.0007989186
## 17     17 0.03220992 0.21809658 0.02496221 0.001315084 0.03542000 0.0007926923
## 18     18 0.03211348 0.22278477 0.02488082 0.001329096 0.03586858 0.0008024469
## 19     19 0.03212837 0.22212585 0.02490779 0.001341751 0.03746177 0.0008003150
## 20     20 0.03213781 0.22162102 0.02492734 0.001312032 0.03662545 0.0008052886
## 21     21 0.03213292 0.22181018 0.02493871 0.001309637 0.03648529 0.0008097871
## 22     22 0.03216415 0.22035170 0.02496546 0.001314688 0.03606900 0.0007887597
## 23     23 0.03218004 0.21978765 0.02499320 0.001345886 0.03797314 0.0008001592
## 24     24 0.03217962 0.21989948 0.02499754 0.001375751 0.03808737 0.0008155772
## 25     25 0.03216149 0.22080358 0.02497778 0.001375812 0.03724828 0.0008036009
## 26     26 0.03212754 0.22246909 0.02494849 0.001386134 0.03808964 0.0008222826
## 27     27 0.03212246 0.22285030 0.02494429 0.001391138 0.03842345 0.0008183642
## 28     28 0.03212054 0.22291448 0.02492916 0.001365167 0.03745133 0.0007960654
## 29     29 0.03213711 0.22235720 0.02493668 0.001391574 0.03860505 0.0008025450
## 30     30 0.03212025 0.22308219 0.02492951 0.001388672 0.03889539 0.0008091720
## 31     31 0.03208205 0.22473091 0.02492124 0.001375580 0.03945830 0.0008133107
## 32     32 0.03207245 0.22525612 0.02491403 0.001391883 0.04080784 0.0008220509
## 33     33 0.03205101 0.22624599 0.02491031 0.001414465 0.04119436 0.0008491636
## 34     34 0.03204038 0.22659548 0.02490480 0.001410639 0.03965959 0.0008407845
## 35     35 0.03203920 0.22676001 0.02490051 0.001440626 0.04120706 0.0008483916
## 36     36 0.03204357 0.22658775 0.02490517 0.001438758 0.04130931 0.0008377819
## 37     37 0.03205419 0.22603709 0.02492207 0.001411729 0.03992416 0.0008175293
## 38     38 0.03205665 0.22597332 0.02491471 0.001416434 0.04021166 0.0008146284
## 39     39 0.03205383 0.22615437 0.02489986 0.001412482 0.04047099 0.0008314997
## 40     40 0.03204261 0.22671345 0.02489137 0.001406340 0.04047229 0.0008341381
## 41     41 0.03204194 0.22681627 0.02489759 0.001388689 0.04040147 0.0008204343
## 42     42 0.03206928 0.22555906 0.02490955 0.001403987 0.04095464 0.0008232951
## 43     43 0.03206459 0.22584693 0.02490599 0.001421581 0.04094682 0.0008400139
## 44     44 0.03208054 0.22514515 0.02491116 0.001424649 0.04083853 0.0008497526
## 45     45 0.03209622 0.22450274 0.02491986 0.001435414 0.04092379 0.0008529708
## 46     46 0.03209956 0.22434653 0.02491289 0.001441379 0.04081349 0.0008540198
## 47     47 0.03209368 0.22467742 0.02492201 0.001431325 0.04050589 0.0008697596
## 48     48 0.03209705 0.22454075 0.02492311 0.001417384 0.04056557 0.0008803787
## 49     49 0.03210387 0.22430303 0.02493020 0.001422928 0.04096309 0.0008941356
## 50     50 0.03210893 0.22418459 0.02494379 0.001433631 0.04040852 0.0008870796
## 51     51 0.03211367 0.22395579 0.02494293 0.001410433 0.03937421 0.0008857971
## 52     52 0.03210812 0.22425578 0.02493588 0.001435740 0.03983291 0.0008981506
## 53     53 0.03213614 0.22308290 0.02495360 0.001421114 0.03948345 0.0008819039
## 54     54 0.03211956 0.22381943 0.02493168 0.001419330 0.03933501 0.0008807096
## 55     55 0.03212483 0.22349935 0.02494206 0.001407913 0.03907454 0.0008742864
## 56     56 0.03211988 0.22366659 0.02493888 0.001390261 0.03823314 0.0008750261
## 57     57 0.03211959 0.22379649 0.02495186 0.001411565 0.03936397 0.0008980209
## 58     58 0.03211239 0.22416596 0.02494613 0.001415538 0.03898629 0.0008897171
## 59     59 0.03213626 0.22315172 0.02496361 0.001426162 0.03930488 0.0008823017
## 60     60 0.03212702 0.22361709 0.02494877 0.001416076 0.03903726 0.0008765432
## 61     61 0.03210096 0.22489597 0.02492605 0.001439037 0.04028342 0.0008964696
## 62     62 0.03210095 0.22492682 0.02493399 0.001438011 0.04009270 0.0008861095
## 63     63 0.03209720 0.22513292 0.02493683 0.001431737 0.04002607 0.0008904852
## 64     64 0.03208974 0.22549962 0.02494178 0.001448366 0.04069485 0.0009083664
## 65     65 0.03207369 0.22624304 0.02492354 0.001446947 0.04059217 0.0009086123
## 66     66 0.03208122 0.22592667 0.02492976 0.001445905 0.04036995 0.0009069383
## 67     67 0.03208651 0.22577119 0.02492386 0.001467726 0.04172392 0.0009210062
## 68     68 0.03208296 0.22600295 0.02492151 0.001460244 0.04121981 0.0009289866
## 69     69 0.03208365 0.22601794 0.02492840 0.001460697 0.04108034 0.0009501540
## 70     70 0.03206845 0.22667152 0.02491273 0.001459194 0.04109847 0.0009502501
## 71     71 0.03207832 0.22630140 0.02492499 0.001464890 0.04120853 0.0009532068
## 72     72 0.03207177 0.22660594 0.02492913 0.001478852 0.04139493 0.0009545841
## 73     73 0.03207884 0.22627552 0.02493485 0.001469068 0.04036586 0.0009346723
## 74     74 0.03207339 0.22659491 0.02493149 0.001463956 0.04019921 0.0009356794
## 75     75 0.03206829 0.22688087 0.02492237 0.001474386 0.04053992 0.0009318329
## 76     76 0.03206466 0.22708593 0.02491883 0.001467288 0.04050069 0.0009265166
## 77     77 0.03206180 0.22727944 0.02491406 0.001478382 0.04097980 0.0009401027
## 78     78 0.03206572 0.22712987 0.02492160 0.001490180 0.04163382 0.0009654985
## 79     79 0.03206475 0.22718500 0.02491609 0.001497328 0.04179955 0.0009693802
## 80     80 0.03205612 0.22759815 0.02490265 0.001502915 0.04195964 0.0009844772
## 81     81 0.03205316 0.22773533 0.02489855 0.001505466 0.04189693 0.0009817560
## 82     82 0.03204938 0.22791273 0.02489471 0.001512247 0.04196206 0.0009838911
## 83     83 0.03202912 0.22879362 0.02487673 0.001523331 0.04242504 0.0009910866
## 84     84 0.03202471 0.22898440 0.02487507 0.001511604 0.04207290 0.0009775806
## 85     85 0.03201666 0.22929142 0.02486827 0.001505642 0.04167351 0.0009698796
## 86     86 0.03202020 0.22919691 0.02487510 0.001505318 0.04148260 0.0009797739
## 87     87 0.03202049 0.22920823 0.02486943 0.001513029 0.04154932 0.0009816358
## 88     88 0.03200355 0.22995467 0.02485315 0.001509959 0.04129271 0.0009839007
## 89     89 0.03199615 0.23030017 0.02485287 0.001494104 0.04064939 0.0009802010
## 90     90 0.03199122 0.23047923 0.02485128 0.001497331 0.04054004 0.0009862718
## 91     91 0.03199047 0.23048736 0.02484950 0.001492883 0.04004239 0.0009767817
## 92     92 0.03198692 0.23068711 0.02485162 0.001503173 0.04046130 0.0009824728
## 93     93 0.03199563 0.23033078 0.02486512 0.001501557 0.04050489 0.0009831600
## 94     94 0.03199797 0.23019872 0.02487101 0.001491583 0.03987966 0.0009803743
## 95     95 0.03199335 0.23047009 0.02486655 0.001497794 0.03984521 0.0009876680
## 96     96 0.03199746 0.23032450 0.02486091 0.001503405 0.04010818 0.0009930313
## 97     97 0.03199439 0.23046756 0.02485649 0.001523995 0.04066097 0.0010012025
## 98     98 0.03199664 0.23033950 0.02485843 0.001524000 0.04051750 0.0009991824
## 99     99 0.03199963 0.23022039 0.02485740 0.001532018 0.04062958 0.0010081561
## 100   100 0.03199394 0.23046923 0.02485372 0.001528450 0.04054620 0.0010078808
## 101   101 0.03199103 0.23058576 0.02485530 0.001521794 0.03996122 0.0009983527
## 102   102 0.03199172 0.23056819 0.02485462 0.001524524 0.04016561 0.0009981462
## 103   103 0.03197871 0.23114909 0.02484476 0.001525203 0.04043456 0.0009987292
## 104   104 0.03197442 0.23134640 0.02484161 0.001529584 0.04039329 0.0010055524
## 105   105 0.03197723 0.23123436 0.02483659 0.001532022 0.04045956 0.0010119291
## 106   106 0.03197358 0.23140859 0.02483619 0.001533794 0.04089880 0.0010164772
## 107   107 0.03197305 0.23144303 0.02483680 0.001539769 0.04097211 0.0010192725
## 108   108 0.03197029 0.23154870 0.02483612 0.001541826 0.04123377 0.0010298468
## 109   109 0.03197142 0.23147689 0.02483488 0.001538675 0.04102166 0.0010223219
## 110   110 0.03196823 0.23159643 0.02482476 0.001529341 0.04067338 0.0010134655
## 111   111 0.03197336 0.23138431 0.02482805 0.001533506 0.04106195 0.0010142136
## 112   112 0.03197136 0.23152606 0.02482293 0.001534555 0.04113854 0.0010085159
## 113   113 0.03197461 0.23137605 0.02482809 0.001531756 0.04104360 0.0010053112
## 114   114 0.03197604 0.23132361 0.02482609 0.001528680 0.04094427 0.0010053142
## 115   115 0.03197837 0.23122685 0.02483174 0.001530312 0.04067612 0.0010056174
## 116   116 0.03197979 0.23118245 0.02483204 0.001530900 0.04075648 0.0010049239
## 117   117 0.03198423 0.23099071 0.02483741 0.001524602 0.04059462 0.0010025404
## 118   118 0.03198748 0.23086744 0.02484001 0.001524930 0.04057813 0.0009998928
## 119   119 0.03199519 0.23053784 0.02484830 0.001525864 0.04054937 0.0010003865
## 120   120 0.03198976 0.23078268 0.02484224 0.001526166 0.04039139 0.0010019231
## 121   121 0.03198885 0.23082304 0.02484123 0.001523881 0.04011096 0.0009993482
## 122   122 0.03199615 0.23052753 0.02484857 0.001524203 0.04019993 0.0009978955
## 123   123 0.03199822 0.23042646 0.02484948 0.001520157 0.04005783 0.0009911628
## 124   124 0.03200088 0.23032480 0.02485123 0.001522284 0.04027961 0.0009901903
## 125   125 0.03199964 0.23036807 0.02484816 0.001522779 0.04032824 0.0009907574
## 126   126 0.03200003 0.23038047 0.02484480 0.001523229 0.04039877 0.0009974876
## 127   127 0.03200552 0.23015056 0.02484835 0.001524890 0.04057208 0.0009998711
## 128   128 0.03200849 0.23002288 0.02485181 0.001528606 0.04074557 0.0010019670
## 129   129 0.03200626 0.23011574 0.02484972 0.001527317 0.04070563 0.0010032649
## 130   130 0.03200723 0.23010004 0.02485102 0.001527097 0.04074447 0.0010069934
## 131   131 0.03201137 0.22992104 0.02485348 0.001528458 0.04078396 0.0010063671
## 132   132 0.03200983 0.22999040 0.02485419 0.001530295 0.04084604 0.0010071048
## 133   133 0.03201344 0.22984440 0.02485941 0.001533387 0.04108508 0.0010103116
## 134   134 0.03201365 0.22984281 0.02486109 0.001534540 0.04101024 0.0010112674
## 135   135 0.03201087 0.22997089 0.02485950 0.001532274 0.04101278 0.0010095930
## 136   136 0.03200837 0.23008806 0.02485724 0.001532864 0.04107810 0.0010121125
## 137   137 0.03200963 0.23002545 0.02485927 0.001531753 0.04099892 0.0010112603
## 138   138 0.03201521 0.22978175 0.02486343 0.001529153 0.04079978 0.0010098656
## 139   139 0.03201599 0.22975859 0.02486464 0.001531158 0.04090856 0.0010121685
## 140   140 0.03201472 0.22979583 0.02486228 0.001529158 0.04087428 0.0010116301
## 141   141 0.03201762 0.22966093 0.02486391 0.001527853 0.04080573 0.0010124458
## 142   142 0.03201652 0.22972501 0.02486216 0.001529424 0.04087584 0.0010118080
## 143   143 0.03201593 0.22975229 0.02486209 0.001532447 0.04099954 0.0010146881
## 144   144 0.03201685 0.22972231 0.02486141 0.001533976 0.04103142 0.0010153620
## 145   145 0.03201745 0.22970677 0.02486172 0.001533723 0.04103160 0.0010156287
## 146   146 0.03201830 0.22966711 0.02486291 0.001532618 0.04100083 0.0010146294
## 147   147 0.03201959 0.22961148 0.02486442 0.001533643 0.04103777 0.0010153806
## 148   148 0.03202083 0.22956075 0.02486592 0.001532662 0.04098088 0.0010129589
## 149   149 0.03202186 0.22952486 0.02486700 0.001534175 0.04101414 0.0010138945
## 150   150 0.03202152 0.22954559 0.02486707 0.001536138 0.04108821 0.0010166699
## 151   151 0.03202283 0.22948672 0.02486717 0.001536222 0.04104770 0.0010162828
## 152   152 0.03202294 0.22948434 0.02486700 0.001536319 0.04107400 0.0010171523
## 153   153 0.03202280 0.22949054 0.02486656 0.001536304 0.04106531 0.0010170736
## 154   154 0.03202125 0.22955754 0.02486463 0.001536783 0.04108383 0.0010179749
## 155   155 0.03202162 0.22953372 0.02486414 0.001537131 0.04110253 0.0010177444
## 156   156 0.03202231 0.22950264 0.02486497 0.001537206 0.04114068 0.0010179990
## 157   157 0.03202253 0.22949211 0.02486553 0.001535889 0.04110959 0.0010174766
## 158   158 0.03202272 0.22948379 0.02486528 0.001536237 0.04110658 0.0010176311
## 159   159 0.03202318 0.22946528 0.02486570 0.001535923 0.04109475 0.0010176663
## 160   160 0.03202287 0.22948068 0.02486567 0.001536339 0.04111202 0.0010178004
## 161   161 0.03202274 0.22948616 0.02486571 0.001536346 0.04111327 0.0010175342
## 162   162 0.03202292 0.22947816 0.02486583 0.001536473 0.04111787 0.0010178911
## 163   163 0.03202284 0.22948087 0.02486558 0.001536518 0.04112019 0.0010179683
## 164   164 0.03202267 0.22948948 0.02486541 0.001536708 0.04113085 0.0010181546
## [1] "Best Model"
##     nvmax
## 110   110

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.096921e+00  2.096096e+00  2.097746e+00
## PC1         -4.820829e-04 -5.540102e-04 -4.101556e-04
## PC2         -9.439758e-04 -1.017022e-03 -8.709292e-04
## PC3         -4.376253e-04 -5.110635e-04 -3.641872e-04
## PC4         -3.485515e-04 -4.229921e-04 -2.741108e-04
## PC5          2.305793e-04  1.538579e-04  3.073007e-04
## PC6         -9.916849e-05 -1.759127e-04 -2.242430e-05
## PC7         -2.034675e-04 -2.818330e-04 -1.251020e-04
## PC8         -3.732177e-05 -1.175225e-04  4.287892e-05
## PC9         -5.229907e-05 -1.344202e-04  2.982208e-05
## PC11        -5.388938e-04 -6.279221e-04 -4.498656e-04
## PC12        -5.049829e-04 -5.988052e-04 -4.111606e-04
## PC13         3.431100e-04  2.475465e-04  4.386736e-04
## PC14         2.527666e-04  1.537902e-04  3.517430e-04
## PC16         3.558417e-04  2.538653e-04  4.578182e-04
## PC17        -1.998529e-04 -3.074757e-04 -9.223015e-05
## PC18        -3.654310e-04 -4.773739e-04 -2.534881e-04
## PC19         4.317135e-05 -7.010991e-05  1.564526e-04
## PC20         4.101492e-04  2.862502e-04  5.340482e-04
## PC21         8.180626e-05 -4.747785e-05  2.110904e-04
## PC22         9.282618e-05 -1.090969e-04  2.947493e-04
## PC23         2.053985e-04 -4.390229e-05  4.546993e-04
## PC24        -7.967536e-04 -1.087088e-03 -5.064190e-04
## PC25         2.524151e-04 -7.599813e-05  5.808283e-04
## PC26         3.833125e-04  4.667886e-05  7.199461e-04
## PC27         2.715470e-04 -6.361989e-05  6.067139e-04
## PC29         3.679176e-04 -5.941506e-06  7.417766e-04
## PC32        -7.123956e-04 -1.128028e-03 -2.967633e-04
## PC33         7.039065e-04  2.795701e-04  1.128243e-03
## PC34         1.099756e-03  6.511037e-04  1.548408e-03
## PC37        -3.603607e-04 -8.610729e-04  1.403515e-04
## PC38         2.032648e-04 -3.141561e-04  7.206857e-04
## PC39        -2.080906e-04 -7.413170e-04  3.251358e-04
## PC42        -2.251949e-04 -7.889242e-04  3.385343e-04
## PC44         6.421781e-04  7.553640e-05  1.208820e-03
## PC45        -2.968457e-04 -8.659781e-04  2.722868e-04
## PC47        -4.846403e-04 -1.054791e-03  8.551069e-05
## PC49         3.431287e-04 -2.384184e-04  9.246758e-04
## PC57        -7.572567e-04 -1.370314e-03 -1.441993e-04
## PC59         9.818223e-04  3.716729e-04  1.591972e-03
## PC62        -3.745738e-04 -1.004500e-03  2.553524e-04
## PC63        -7.029549e-04 -1.328927e-03 -7.698276e-05
## PC64        -9.023538e-04 -1.535550e-03 -2.691578e-04
## PC66        -4.331374e-04 -1.073173e-03  2.068977e-04
## PC68         4.950411e-04 -1.474936e-04  1.137576e-03
## PC71         5.213597e-04 -1.282802e-04  1.171000e-03
## PC73         4.679612e-04 -1.857211e-04  1.121643e-03
## PC74        -6.580551e-04 -1.315861e-03 -2.495721e-07
## PC75        -8.829193e-04 -1.546441e-03 -2.193976e-04
## PC77         4.911264e-04 -1.697421e-04  1.151995e-03
## PC78         2.765648e-04 -3.878102e-04  9.409398e-04
## PC79         5.663015e-04 -1.042508e-04  1.236854e-03
## PC81         7.267212e-04  5.523200e-05  1.398210e-03
## PC82         4.337908e-04 -2.533935e-04  1.120975e-03
## PC83        -7.194573e-04 -1.403123e-03 -3.579173e-05
## PC84         7.996992e-04  1.137021e-04  1.485696e-03
## PC85         1.123558e-03  4.290312e-04  1.818084e-03
## PC87         1.719575e-03  1.029988e-03  2.409162e-03
## PC88        -1.139771e-03 -1.842020e-03 -4.375212e-04
## PC89        -5.395392e-04 -1.239624e-03  1.605451e-04
## PC90        -5.072518e-04 -1.208860e-03  1.943566e-04
## PC92         2.749095e-04 -4.208083e-04  9.706273e-04
## PC94        -9.179035e-04 -1.626069e-03 -2.097378e-04
## PC96        -4.613714e-04 -1.176317e-03  2.535744e-04
## PC97        -5.007501e-04 -1.211172e-03  2.096717e-04
## PC98        -4.817032e-04 -1.191269e-03  2.278624e-04
## PC99        -4.487862e-04 -1.159985e-03  2.624125e-04
## PC102       -5.760555e-04 -1.289414e-03  1.373025e-04
## PC104       -6.682706e-04 -1.388170e-03  5.162844e-05
## PC105        4.955110e-04 -2.268609e-04  1.217883e-03
## PC106        1.242436e-03  5.179929e-04  1.966879e-03
## PC107        6.103805e-04 -1.150509e-04  1.335812e-03
## PC109        5.375521e-04 -1.881497e-04  1.263254e-03
## PC110       -5.635217e-04 -1.293753e-03  1.667097e-04
## PC111       -8.082237e-04 -1.541821e-03 -7.462696e-05
## PC113        3.104138e-04 -4.253179e-04  1.046145e-03
## PC114       -7.563989e-04 -1.484119e-03 -2.867898e-05
## PC115       -1.655666e-03 -2.389151e-03 -9.221812e-04
## PC118        7.180344e-04 -2.119439e-05  1.457263e-03
## PC119       -5.373825e-04 -1.276933e-03  2.021678e-04
## PC121       -4.009325e-04 -1.143311e-03  3.414462e-04
## PC122        4.979362e-04 -2.400266e-04  1.235899e-03
## PC123       -5.413550e-04 -1.285929e-03  2.032187e-04
## PC125        4.774826e-04 -2.684608e-04  1.223426e-03
## PC128       -9.910001e-04 -1.736981e-03 -2.450190e-04
## PC130        4.081361e-04 -3.380762e-04  1.154348e-03
## PC131       -1.474609e-03 -2.219450e-03 -7.297668e-04
## PC132        3.074585e-04 -4.456296e-04  1.060547e-03
## PC134        9.756772e-04  2.242233e-04  1.727131e-03
## PC135        4.401274e-04 -3.112522e-04  1.191507e-03
## PC136        5.537986e-04 -2.028168e-04  1.310414e-03
## PC137       -7.524878e-04 -1.508038e-03  3.062572e-06
## PC138        5.529543e-04 -2.063313e-04  1.312240e-03
## PC139       -7.448016e-04 -1.499809e-03  1.020571e-05
## PC140       -3.901045e-04 -1.147415e-03  3.672062e-04
## PC141        3.924341e-04 -3.613440e-04  1.146212e-03
## PC143        3.158275e-04 -4.421895e-04  1.073844e-03
## PC144        1.024203e-03  2.633283e-04  1.785078e-03
## PC146        5.923295e-04 -1.751615e-04  1.359820e-03
## PC148       -5.130788e-04 -1.275746e-03  2.495885e-04
## PC151        6.756695e-04 -9.189504e-05  1.443234e-03
## PC152       -6.885784e-04 -1.455780e-03  7.862350e-05
## PC153        4.660142e-04 -3.072961e-04  1.239325e-03
## PC154       -8.514425e-04 -1.620867e-03 -8.201780e-05
## PC155        1.067769e-03  2.971624e-04  1.838375e-03
## PC156        1.368886e-03  5.966260e-04  2.141146e-03
## PC159        2.124070e-03  1.347256e-03  2.900884e-03
## PC161        3.374139e-04 -4.362380e-04  1.111066e-03
## PC162       -1.145268e-03 -1.925606e-03 -3.649295e-04
## PC163        6.712821e-04 -1.039883e-04  1.446553e-03
## PC164        3.014931e-04 -4.803026e-04  1.083289e-03

Test

if (algo.forward.caret == TRUE){
    test.model(model=model.forward, test=data.test
             ,method = 'leapForward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.024   2.087   2.101   2.097   2.110   2.141 
## [1] "leapForward  Test MSE: 0.00102089613568136"
## [1] "leapForward  Test RMSE: 0.0319514653135244"
## [1] "leapForward  Test MSE (Org Scale): 90.4181132605647"
## [1] "leapForward  Test RMSE (Org Scale): 9.50884394974304"

Backward Elimination with CV

Train

if (algo.backward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapBackward"
                                   ,feature.names =  feature.names)
  model.backward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE      RMSESD RsquaredSD        MAESD
## 1       1 0.03477291 0.08867342 0.02692328 0.001241527 0.02694524 0.0007080957
## 2       2 0.03430526 0.11280445 0.02659499 0.001382164 0.03177241 0.0008448670
## 3       3 0.03397028 0.13012834 0.02629226 0.001480367 0.03628599 0.0009228758
## 4       4 0.03357202 0.15065682 0.02594497 0.001495727 0.03862072 0.0008898614
## 5       5 0.03329810 0.16453122 0.02572753 0.001417074 0.04025876 0.0008731997
## 6       6 0.03307787 0.17576857 0.02556677 0.001504730 0.04328083 0.0009446991
## 7       7 0.03306407 0.17615661 0.02557869 0.001452171 0.03968875 0.0009114778
## 8       8 0.03295739 0.18134530 0.02547827 0.001388292 0.03792930 0.0008274437
## 9       9 0.03287939 0.18540661 0.02543399 0.001348211 0.03725882 0.0008444702
## 10     10 0.03275470 0.19146665 0.02537654 0.001412011 0.03832877 0.0008875603
## 11     11 0.03263033 0.19759284 0.02525113 0.001364011 0.03659311 0.0008756810
## 12     12 0.03257233 0.20048903 0.02523311 0.001354071 0.03672316 0.0008542372
## 13     13 0.03251170 0.20352002 0.02521402 0.001350424 0.03679330 0.0008351289
## 14     14 0.03243637 0.20713813 0.02516582 0.001326182 0.03632042 0.0008198317
## 15     15 0.03236399 0.21078002 0.02508994 0.001340624 0.03820307 0.0008389046
## 16     16 0.03229713 0.21389724 0.02502812 0.001305601 0.03482762 0.0007953255
## 17     17 0.03220992 0.21809658 0.02496221 0.001315084 0.03542000 0.0007926923
## 18     18 0.03211348 0.22278477 0.02488082 0.001329096 0.03586858 0.0008024469
## 19     19 0.03212837 0.22212585 0.02490779 0.001341751 0.03746177 0.0008003150
## 20     20 0.03213781 0.22162102 0.02492734 0.001312032 0.03662545 0.0008052886
## 21     21 0.03213292 0.22181018 0.02493871 0.001309637 0.03648529 0.0008097871
## 22     22 0.03216415 0.22035170 0.02496546 0.001314688 0.03606900 0.0007887597
## 23     23 0.03218004 0.21978765 0.02499320 0.001345886 0.03797314 0.0008001592
## 24     24 0.03217962 0.21989948 0.02499754 0.001375751 0.03808737 0.0008155772
## 25     25 0.03214851 0.22141516 0.02496231 0.001390258 0.03864470 0.0008086626
## 26     26 0.03212614 0.22258862 0.02494025 0.001418549 0.03972298 0.0008370979
## 27     27 0.03210861 0.22350784 0.02493115 0.001397575 0.03933920 0.0008293077
## 28     28 0.03211566 0.22313668 0.02492613 0.001363803 0.03747320 0.0007978509
## 29     29 0.03213711 0.22235720 0.02493668 0.001391574 0.03860505 0.0008025450
## 30     30 0.03211669 0.22325500 0.02493446 0.001389989 0.03872350 0.0008086606
## 31     31 0.03207909 0.22488225 0.02492618 0.001376529 0.03929919 0.0008131397
## 32     32 0.03207245 0.22525612 0.02491403 0.001391883 0.04080784 0.0008220509
## 33     33 0.03205101 0.22624599 0.02491031 0.001414465 0.04119436 0.0008491636
## 34     34 0.03204038 0.22659548 0.02490480 0.001410639 0.03965959 0.0008407845
## 35     35 0.03205638 0.22595383 0.02491509 0.001421328 0.03969448 0.0008289471
## 36     36 0.03206960 0.22534588 0.02492428 0.001406537 0.03919583 0.0008087184
## 37     37 0.03205758 0.22583707 0.02492707 0.001397671 0.03917762 0.0008094775
## 38     38 0.03205322 0.22611470 0.02491297 0.001412162 0.03992544 0.0008194599
## 39     39 0.03204781 0.22642910 0.02489963 0.001412050 0.04036948 0.0008314135
## 40     40 0.03204261 0.22671345 0.02489137 0.001406340 0.04047229 0.0008341381
## 41     41 0.03204194 0.22681627 0.02489759 0.001388689 0.04040147 0.0008204343
## 42     42 0.03206928 0.22555906 0.02490955 0.001403987 0.04095464 0.0008232951
## 43     43 0.03206459 0.22584693 0.02490599 0.001421581 0.04094682 0.0008400139
## 44     44 0.03208054 0.22514515 0.02491116 0.001424649 0.04083853 0.0008497526
## 45     45 0.03210064 0.22431640 0.02492423 0.001435872 0.04100009 0.0008547758
## 46     46 0.03210628 0.22402150 0.02491449 0.001439508 0.04058852 0.0008573306
## 47     47 0.03209280 0.22471922 0.02491902 0.001432224 0.04059939 0.0008732838
## 48     48 0.03210532 0.22423489 0.02493032 0.001430459 0.04100737 0.0008879827
## 49     49 0.03211163 0.22404680 0.02493360 0.001434442 0.04132732 0.0008979772
## 50     50 0.03210694 0.22425560 0.02494084 0.001434283 0.04063111 0.0008854307
## 51     51 0.03211647 0.22386847 0.02494583 0.001417559 0.03972872 0.0008902399
## 52     52 0.03210766 0.22430848 0.02493085 0.001435295 0.03978106 0.0008894305
## 53     53 0.03213614 0.22308290 0.02495360 0.001421114 0.03948345 0.0008819039
## 54     54 0.03211956 0.22381943 0.02493168 0.001419330 0.03933501 0.0008807096
## 55     55 0.03212483 0.22349935 0.02494206 0.001407913 0.03907454 0.0008742864
## 56     56 0.03211216 0.22400631 0.02492988 0.001394593 0.03831464 0.0008699062
## 57     57 0.03211579 0.22398113 0.02494166 0.001413633 0.03940418 0.0008922376
## 58     58 0.03210192 0.22462028 0.02493502 0.001411151 0.03893947 0.0008804919
## 59     59 0.03213269 0.22331780 0.02495672 0.001422367 0.03917530 0.0008754991
## 60     60 0.03212672 0.22361511 0.02494886 0.001419111 0.03901162 0.0008757383
## 61     61 0.03211455 0.22429475 0.02493329 0.001449054 0.04001900 0.0008999164
## 62     62 0.03211156 0.22445435 0.02493655 0.001433003 0.03962380 0.0008864711
## 63     63 0.03210130 0.22495859 0.02494326 0.001431823 0.04007662 0.0008924303
## 64     64 0.03208969 0.22552763 0.02494012 0.001449938 0.04073208 0.0009102667
## 65     65 0.03207265 0.22631752 0.02492411 0.001445949 0.04051978 0.0009095516
## 66     66 0.03208134 0.22594272 0.02493487 0.001446019 0.04035424 0.0009152635
## 67     67 0.03208668 0.22581330 0.02493076 0.001467886 0.04168388 0.0009319928
## 68     68 0.03208124 0.22607877 0.02493223 0.001462110 0.04127941 0.0009433519
## 69     69 0.03207323 0.22648474 0.02491923 0.001472687 0.04156043 0.0009541251
## 70     70 0.03207017 0.22661973 0.02491569 0.001471120 0.04150434 0.0009530351
## 71     71 0.03207832 0.22630140 0.02492499 0.001464890 0.04120853 0.0009532068
## 72     72 0.03207177 0.22660594 0.02492913 0.001478852 0.04139493 0.0009545841
## 73     73 0.03208246 0.22610978 0.02493569 0.001469587 0.04032261 0.0009342418
## 74     74 0.03207333 0.22660173 0.02493100 0.001463948 0.04020081 0.0009359185
## 75     75 0.03206816 0.22688445 0.02492139 0.001472498 0.04059702 0.0009385040
## 76     76 0.03207009 0.22683524 0.02492349 0.001470099 0.04077888 0.0009345883
## 77     77 0.03206180 0.22727944 0.02491406 0.001478382 0.04097980 0.0009401027
## 78     78 0.03206461 0.22718097 0.02491839 0.001490932 0.04165749 0.0009645938
## 79     79 0.03206641 0.22712476 0.02491256 0.001496181 0.04177050 0.0009683861
## 80     80 0.03205529 0.22765676 0.02489975 0.001503489 0.04198825 0.0009836589
## 81     81 0.03204457 0.22811258 0.02489072 0.001500319 0.04184524 0.0009770287
## 82     82 0.03203653 0.22847008 0.02488121 0.001499180 0.04174717 0.0009735092
## 83     83 0.03202519 0.22896086 0.02487473 0.001515916 0.04228341 0.0009889464
## 84     84 0.03202412 0.22901156 0.02487772 0.001509442 0.04202499 0.0009777206
## 85     85 0.03201281 0.22945309 0.02487023 0.001499758 0.04157401 0.0009705243
## 86     86 0.03201595 0.22936249 0.02487838 0.001496303 0.04130307 0.0009805221
## 87     87 0.03200549 0.22984719 0.02486523 0.001495384 0.04121632 0.0009778452
## 88     88 0.03200260 0.23000738 0.02485790 0.001499264 0.04102508 0.0009852350
## 89     89 0.03198973 0.23058811 0.02485260 0.001496409 0.04044055 0.0009807616
## 90     90 0.03199463 0.23035628 0.02485844 0.001494202 0.04055486 0.0009777001
## 91     91 0.03199472 0.23032869 0.02485656 0.001490854 0.04014047 0.0009707329
## 92     92 0.03199470 0.23037548 0.02486261 0.001494151 0.04023059 0.0009690020
## 93     93 0.03200044 0.23012470 0.02487000 0.001495062 0.03987310 0.0009753613
## 94     94 0.03200030 0.23009599 0.02487314 0.001487746 0.03968025 0.0009761965
## 95     95 0.03199030 0.23060098 0.02486373 0.001500013 0.04007995 0.0009896515
## 96     96 0.03199764 0.23031777 0.02486092 0.001503197 0.04010516 0.0009930310
## 97     97 0.03199814 0.23028712 0.02485950 0.001519767 0.04058028 0.0009996734
## 98     98 0.03199203 0.23054318 0.02485312 0.001524204 0.04048117 0.0009986949
## 99     99 0.03199760 0.23030316 0.02485463 0.001532446 0.04055220 0.0010063278
## 100   100 0.03199562 0.23039090 0.02485536 0.001528618 0.04052124 0.0010069527
## 101   101 0.03199256 0.23051327 0.02485819 0.001521955 0.03993837 0.0009966870
## 102   102 0.03199172 0.23056819 0.02485462 0.001524524 0.04016561 0.0009981462
## 103   103 0.03197871 0.23114909 0.02484476 0.001525203 0.04043456 0.0009987292
## 104   104 0.03197442 0.23134640 0.02484161 0.001529584 0.04039329 0.0010055524
## 105   105 0.03197723 0.23123436 0.02483659 0.001532022 0.04045956 0.0010119291
## 106   106 0.03197358 0.23140859 0.02483619 0.001533794 0.04089880 0.0010164772
## 107   107 0.03197305 0.23144303 0.02483680 0.001539769 0.04097211 0.0010192725
## 108   108 0.03197343 0.23144234 0.02483566 0.001547970 0.04135446 0.0010292884
## 109   109 0.03197504 0.23134488 0.02483593 0.001545829 0.04117435 0.0010236430
## 110   110 0.03197190 0.23146242 0.02482630 0.001536594 0.04082764 0.0010154066
## 111   111 0.03197442 0.23135094 0.02482692 0.001535600 0.04110020 0.0010127780
## 112   112 0.03197246 0.23149132 0.02482195 0.001536747 0.04117865 0.0010072769
## 113   113 0.03197461 0.23137605 0.02482809 0.001531756 0.04104360 0.0010053112
## 114   114 0.03197604 0.23132361 0.02482609 0.001528680 0.04094427 0.0010053142
## 115   115 0.03197837 0.23122685 0.02483174 0.001530312 0.04067612 0.0010056174
## 116   116 0.03197979 0.23118245 0.02483204 0.001530900 0.04075648 0.0010049239
## 117   117 0.03198423 0.23099071 0.02483741 0.001524602 0.04059462 0.0010025404
## 118   118 0.03198748 0.23086744 0.02484001 0.001524930 0.04057813 0.0009998928
## 119   119 0.03199519 0.23053784 0.02484830 0.001525864 0.04054937 0.0010003865
## 120   120 0.03198976 0.23078268 0.02484224 0.001526166 0.04039139 0.0010019231
## 121   121 0.03198885 0.23082304 0.02484123 0.001523881 0.04011096 0.0009993482
## 122   122 0.03199615 0.23052753 0.02484857 0.001524203 0.04019993 0.0009978955
## 123   123 0.03199822 0.23042646 0.02484948 0.001520157 0.04005783 0.0009911628
## 124   124 0.03200088 0.23032480 0.02485123 0.001522284 0.04027961 0.0009901903
## 125   125 0.03199964 0.23036807 0.02484816 0.001522779 0.04032824 0.0009907574
## 126   126 0.03200003 0.23038047 0.02484480 0.001523229 0.04039877 0.0009974876
## 127   127 0.03200552 0.23015056 0.02484835 0.001524890 0.04057208 0.0009998711
## 128   128 0.03200849 0.23002288 0.02485181 0.001528606 0.04074557 0.0010019670
## 129   129 0.03200626 0.23011574 0.02484972 0.001527317 0.04070563 0.0010032649
## 130   130 0.03200723 0.23010004 0.02485102 0.001527097 0.04074447 0.0010069934
## 131   131 0.03201137 0.22992104 0.02485348 0.001528458 0.04078396 0.0010063671
## 132   132 0.03200983 0.22999040 0.02485419 0.001530295 0.04084604 0.0010071048
## 133   133 0.03201344 0.22984440 0.02485941 0.001533387 0.04108508 0.0010103116
## 134   134 0.03201365 0.22984281 0.02486109 0.001534540 0.04101024 0.0010112674
## 135   135 0.03201087 0.22997089 0.02485950 0.001532274 0.04101278 0.0010095930
## 136   136 0.03200837 0.23008806 0.02485724 0.001532864 0.04107810 0.0010121125
## 137   137 0.03200963 0.23002545 0.02485927 0.001531753 0.04099892 0.0010112603
## 138   138 0.03201521 0.22978175 0.02486343 0.001529153 0.04079978 0.0010098656
## 139   139 0.03201480 0.22979429 0.02486421 0.001529846 0.04086330 0.0010113660
## 140   140 0.03201422 0.22981594 0.02486244 0.001528604 0.04084881 0.0010119257
## 141   141 0.03201762 0.22966093 0.02486391 0.001527853 0.04080573 0.0010124458
## 142   142 0.03201652 0.22972501 0.02486216 0.001529424 0.04087584 0.0010118080
## 143   143 0.03201593 0.22975229 0.02486209 0.001532447 0.04099954 0.0010146881
## 144   144 0.03201685 0.22972231 0.02486141 0.001533976 0.04103142 0.0010153620
## 145   145 0.03201745 0.22970677 0.02486172 0.001533723 0.04103160 0.0010156287
## 146   146 0.03201830 0.22966711 0.02486291 0.001532618 0.04100083 0.0010146294
## 147   147 0.03201959 0.22961148 0.02486442 0.001533643 0.04103777 0.0010153806
## 148   148 0.03202083 0.22956075 0.02486592 0.001532662 0.04098088 0.0010129589
## 149   149 0.03202186 0.22952486 0.02486700 0.001534175 0.04101414 0.0010138945
## 150   150 0.03202152 0.22954559 0.02486707 0.001536138 0.04108821 0.0010166699
## 151   151 0.03202283 0.22948672 0.02486717 0.001536222 0.04104770 0.0010162828
## 152   152 0.03202294 0.22948434 0.02486700 0.001536319 0.04107400 0.0010171523
## 153   153 0.03202280 0.22949054 0.02486656 0.001536304 0.04106531 0.0010170736
## 154   154 0.03202125 0.22955754 0.02486463 0.001536783 0.04108383 0.0010179749
## 155   155 0.03202162 0.22953372 0.02486414 0.001537131 0.04110253 0.0010177444
## 156   156 0.03202231 0.22950264 0.02486497 0.001537206 0.04114068 0.0010179990
## 157   157 0.03202253 0.22949211 0.02486553 0.001535889 0.04110959 0.0010174766
## 158   158 0.03202272 0.22948379 0.02486528 0.001536237 0.04110658 0.0010176311
## 159   159 0.03202318 0.22946528 0.02486570 0.001535923 0.04109475 0.0010176663
## 160   160 0.03202287 0.22948068 0.02486567 0.001536339 0.04111202 0.0010178004
## 161   161 0.03202274 0.22948616 0.02486571 0.001536346 0.04111327 0.0010175342
## 162   162 0.03202292 0.22947816 0.02486583 0.001536473 0.04111787 0.0010178911
## 163   163 0.03202284 0.22948087 0.02486558 0.001536518 0.04112019 0.0010179683
## 164   164 0.03202267 0.22948948 0.02486541 0.001536708 0.04113085 0.0010181546
## [1] "Best Model"
##     nvmax
## 110   110

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.096921e+00  2.096096e+00  2.097746e+00
## PC1         -4.820829e-04 -5.540102e-04 -4.101556e-04
## PC2         -9.439758e-04 -1.017022e-03 -8.709292e-04
## PC3         -4.376253e-04 -5.110635e-04 -3.641872e-04
## PC4         -3.485515e-04 -4.229921e-04 -2.741108e-04
## PC5          2.305793e-04  1.538579e-04  3.073007e-04
## PC6         -9.916849e-05 -1.759127e-04 -2.242430e-05
## PC7         -2.034675e-04 -2.818330e-04 -1.251020e-04
## PC8         -3.732177e-05 -1.175225e-04  4.287892e-05
## PC9         -5.229907e-05 -1.344202e-04  2.982208e-05
## PC11        -5.388938e-04 -6.279221e-04 -4.498656e-04
## PC12        -5.049829e-04 -5.988052e-04 -4.111606e-04
## PC13         3.431100e-04  2.475465e-04  4.386736e-04
## PC14         2.527666e-04  1.537902e-04  3.517430e-04
## PC16         3.558417e-04  2.538653e-04  4.578182e-04
## PC17        -1.998529e-04 -3.074757e-04 -9.223015e-05
## PC18        -3.654310e-04 -4.773739e-04 -2.534881e-04
## PC19         4.317135e-05 -7.010991e-05  1.564526e-04
## PC20         4.101492e-04  2.862502e-04  5.340482e-04
## PC21         8.180626e-05 -4.747785e-05  2.110904e-04
## PC22         9.282618e-05 -1.090969e-04  2.947493e-04
## PC23         2.053985e-04 -4.390229e-05  4.546993e-04
## PC24        -7.967536e-04 -1.087088e-03 -5.064190e-04
## PC25         2.524151e-04 -7.599813e-05  5.808283e-04
## PC26         3.833125e-04  4.667886e-05  7.199461e-04
## PC27         2.715470e-04 -6.361989e-05  6.067139e-04
## PC29         3.679176e-04 -5.941506e-06  7.417766e-04
## PC32        -7.123956e-04 -1.128028e-03 -2.967633e-04
## PC33         7.039065e-04  2.795701e-04  1.128243e-03
## PC34         1.099756e-03  6.511037e-04  1.548408e-03
## PC37        -3.603607e-04 -8.610729e-04  1.403515e-04
## PC38         2.032648e-04 -3.141561e-04  7.206857e-04
## PC39        -2.080906e-04 -7.413170e-04  3.251358e-04
## PC42        -2.251949e-04 -7.889242e-04  3.385343e-04
## PC44         6.421781e-04  7.553640e-05  1.208820e-03
## PC45        -2.968457e-04 -8.659781e-04  2.722868e-04
## PC47        -4.846403e-04 -1.054791e-03  8.551069e-05
## PC49         3.431287e-04 -2.384184e-04  9.246758e-04
## PC57        -7.572567e-04 -1.370314e-03 -1.441993e-04
## PC59         9.818223e-04  3.716729e-04  1.591972e-03
## PC62        -3.745738e-04 -1.004500e-03  2.553524e-04
## PC63        -7.029549e-04 -1.328927e-03 -7.698276e-05
## PC64        -9.023538e-04 -1.535550e-03 -2.691578e-04
## PC66        -4.331374e-04 -1.073173e-03  2.068977e-04
## PC68         4.950411e-04 -1.474936e-04  1.137576e-03
## PC71         5.213597e-04 -1.282802e-04  1.171000e-03
## PC73         4.679612e-04 -1.857211e-04  1.121643e-03
## PC74        -6.580551e-04 -1.315861e-03 -2.495721e-07
## PC75        -8.829193e-04 -1.546441e-03 -2.193976e-04
## PC77         4.911264e-04 -1.697421e-04  1.151995e-03
## PC78         2.765648e-04 -3.878102e-04  9.409398e-04
## PC79         5.663015e-04 -1.042508e-04  1.236854e-03
## PC81         7.267212e-04  5.523200e-05  1.398210e-03
## PC82         4.337908e-04 -2.533935e-04  1.120975e-03
## PC83        -7.194573e-04 -1.403123e-03 -3.579173e-05
## PC84         7.996992e-04  1.137021e-04  1.485696e-03
## PC85         1.123558e-03  4.290312e-04  1.818084e-03
## PC87         1.719575e-03  1.029988e-03  2.409162e-03
## PC88        -1.139771e-03 -1.842020e-03 -4.375212e-04
## PC89        -5.395392e-04 -1.239624e-03  1.605451e-04
## PC90        -5.072518e-04 -1.208860e-03  1.943566e-04
## PC92         2.749095e-04 -4.208083e-04  9.706273e-04
## PC94        -9.179035e-04 -1.626069e-03 -2.097378e-04
## PC96        -4.613714e-04 -1.176317e-03  2.535744e-04
## PC97        -5.007501e-04 -1.211172e-03  2.096717e-04
## PC98        -4.817032e-04 -1.191269e-03  2.278624e-04
## PC99        -4.487862e-04 -1.159985e-03  2.624125e-04
## PC102       -5.760555e-04 -1.289414e-03  1.373025e-04
## PC104       -6.682706e-04 -1.388170e-03  5.162844e-05
## PC105        4.955110e-04 -2.268609e-04  1.217883e-03
## PC106        1.242436e-03  5.179929e-04  1.966879e-03
## PC107        6.103805e-04 -1.150509e-04  1.335812e-03
## PC109        5.375521e-04 -1.881497e-04  1.263254e-03
## PC110       -5.635217e-04 -1.293753e-03  1.667097e-04
## PC111       -8.082237e-04 -1.541821e-03 -7.462696e-05
## PC113        3.104138e-04 -4.253179e-04  1.046145e-03
## PC114       -7.563989e-04 -1.484119e-03 -2.867898e-05
## PC115       -1.655666e-03 -2.389151e-03 -9.221812e-04
## PC118        7.180344e-04 -2.119439e-05  1.457263e-03
## PC119       -5.373825e-04 -1.276933e-03  2.021678e-04
## PC121       -4.009325e-04 -1.143311e-03  3.414462e-04
## PC122        4.979362e-04 -2.400266e-04  1.235899e-03
## PC123       -5.413550e-04 -1.285929e-03  2.032187e-04
## PC125        4.774826e-04 -2.684608e-04  1.223426e-03
## PC128       -9.910001e-04 -1.736981e-03 -2.450190e-04
## PC130        4.081361e-04 -3.380762e-04  1.154348e-03
## PC131       -1.474609e-03 -2.219450e-03 -7.297668e-04
## PC132        3.074585e-04 -4.456296e-04  1.060547e-03
## PC134        9.756772e-04  2.242233e-04  1.727131e-03
## PC135        4.401274e-04 -3.112522e-04  1.191507e-03
## PC136        5.537986e-04 -2.028168e-04  1.310414e-03
## PC137       -7.524878e-04 -1.508038e-03  3.062572e-06
## PC138        5.529543e-04 -2.063313e-04  1.312240e-03
## PC139       -7.448016e-04 -1.499809e-03  1.020571e-05
## PC140       -3.901045e-04 -1.147415e-03  3.672062e-04
## PC141        3.924341e-04 -3.613440e-04  1.146212e-03
## PC143        3.158275e-04 -4.421895e-04  1.073844e-03
## PC144        1.024203e-03  2.633283e-04  1.785078e-03
## PC146        5.923295e-04 -1.751615e-04  1.359820e-03
## PC148       -5.130788e-04 -1.275746e-03  2.495885e-04
## PC151        6.756695e-04 -9.189504e-05  1.443234e-03
## PC152       -6.885784e-04 -1.455780e-03  7.862350e-05
## PC153        4.660142e-04 -3.072961e-04  1.239325e-03
## PC154       -8.514425e-04 -1.620867e-03 -8.201780e-05
## PC155        1.067769e-03  2.971624e-04  1.838375e-03
## PC156        1.368886e-03  5.966260e-04  2.141146e-03
## PC159        2.124070e-03  1.347256e-03  2.900884e-03
## PC161        3.374139e-04 -4.362380e-04  1.111066e-03
## PC162       -1.145268e-03 -1.925606e-03 -3.649295e-04
## PC163        6.712821e-04 -1.039883e-04  1.446553e-03
## PC164        3.014931e-04 -4.803026e-04  1.083289e-03

Test

if (algo.backward.caret == TRUE){
  test.model(model.backward, data.test
             ,method = 'leapBackward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.024   2.087   2.101   2.097   2.110   2.141 
## [1] "leapBackward  Test MSE: 0.00102089613568136"
## [1] "leapBackward  Test RMSE: 0.0319514653135244"
## [1] "leapBackward  Test MSE (Org Scale): 90.4181132605647"
## [1] "leapBackward  Test RMSE (Org Scale): 9.50884394974304"

Stepwise Selection with CV

Train

if (algo.stepwise.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapSeq"
                                   ,feature.names = feature.names)
  model.stepwise = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
##     nvmax       RMSE   Rsquared        MAE      RMSESD RsquaredSD        MAESD
## 1       1 0.03477291 0.08867342 0.02692328 0.001241527 0.02694524 0.0007080957
## 2       2 0.03430526 0.11280445 0.02659499 0.001382164 0.03177241 0.0008448670
## 3       3 0.03397028 0.13012834 0.02629226 0.001480367 0.03628599 0.0009228758
## 4       4 0.03368539 0.14538925 0.02606470 0.001501094 0.03730811 0.0009134034
## 5       5 0.03329810 0.16453122 0.02572753 0.001417074 0.04025876 0.0008731997
## 6       6 0.03364346 0.14762605 0.02606217 0.001507226 0.03744262 0.0009118042
## 7       7 0.03306407 0.17615661 0.02557869 0.001452171 0.03968875 0.0009114778
## 8       8 0.03295739 0.18134530 0.02547827 0.001388292 0.03792930 0.0008274437
## 9       9 0.03294086 0.18231882 0.02548121 0.001348559 0.03811940 0.0008221184
## 10     10 0.03295196 0.18193795 0.02554625 0.001782672 0.05165982 0.0011296783
## 11     11 0.03278176 0.19005293 0.02536550 0.001226074 0.03662664 0.0007614728
## 12     12 0.03262333 0.19832480 0.02528612 0.001354081 0.03543918 0.0008453433
## 13     13 0.03249855 0.20394578 0.02517899 0.001340311 0.03643932 0.0007938316
## 14     14 0.03244689 0.20662371 0.02517335 0.001343122 0.03644511 0.0008246527
## 15     15 0.03237508 0.21024129 0.02509782 0.001358417 0.03834147 0.0008435020
## 16     16 0.03229713 0.21389724 0.02502812 0.001305601 0.03482762 0.0007953255
## 17     17 0.03228024 0.21477143 0.02501865 0.001441694 0.03767126 0.0008421591
## 18     18 0.03211348 0.22278477 0.02488082 0.001329096 0.03586858 0.0008024469
## 19     19 0.03212837 0.22212585 0.02490779 0.001341751 0.03746177 0.0008003150
## 20     20 0.03213781 0.22162102 0.02492734 0.001312032 0.03662545 0.0008052886
## 21     21 0.03213292 0.22181018 0.02493871 0.001309637 0.03648529 0.0008097871
## 22     22 0.03216415 0.22035170 0.02496546 0.001314688 0.03606900 0.0007887597
## 23     23 0.03223054 0.21740961 0.02503639 0.001432144 0.03921016 0.0008294727
## 24     24 0.03221847 0.21802288 0.02499991 0.001404291 0.03862676 0.0008177595
## 25     25 0.03216149 0.22080358 0.02497778 0.001375812 0.03724828 0.0008036009
## 26     26 0.03212614 0.22258862 0.02494025 0.001418549 0.03972298 0.0008370979
## 27     27 0.03214944 0.22158627 0.02494780 0.001458076 0.04017474 0.0008355988
## 28     28 0.03211566 0.22313668 0.02492613 0.001363803 0.03747320 0.0007978509
## 29     29 0.03218994 0.21951031 0.02493448 0.001436620 0.04157301 0.0008278866
## 30     30 0.03211594 0.22319337 0.02493399 0.001400114 0.04048388 0.0008208170
## 31     31 0.03207909 0.22488225 0.02492618 0.001376529 0.03929919 0.0008131397
## 32     32 0.03206701 0.22507278 0.02491505 0.001393636 0.04100214 0.0008220187
## 33     33 0.03207480 0.22505448 0.02490972 0.001405358 0.04126198 0.0008488342
## 34     34 0.03204038 0.22659548 0.02490480 0.001410639 0.03965959 0.0008407845
## 35     35 0.03203920 0.22676001 0.02490051 0.001440626 0.04120706 0.0008483916
## 36     36 0.03205493 0.22605123 0.02491630 0.001428149 0.04018208 0.0008259982
## 37     37 0.03209247 0.22448256 0.02493880 0.001373083 0.03699118 0.0007959057
## 38     38 0.03209946 0.22392211 0.02493283 0.001392169 0.03975165 0.0008295505
## 39     39 0.03206745 0.22560525 0.02490482 0.001384284 0.03932639 0.0008207336
## 40     40 0.03212107 0.22320888 0.02490222 0.001374020 0.03771811 0.0007961591
## 41     41 0.03204194 0.22681627 0.02489759 0.001388689 0.04040147 0.0008204343
## 42     42 0.03206928 0.22555906 0.02490955 0.001403987 0.04095464 0.0008232951
## 43     43 0.03206459 0.22584693 0.02490599 0.001421581 0.04094682 0.0008400139
## 44     44 0.03208054 0.22514515 0.02491116 0.001424649 0.04083853 0.0008497526
## 45     45 0.03209622 0.22450274 0.02491986 0.001435414 0.04092379 0.0008529708
## 46     46 0.03210495 0.22363884 0.02490932 0.001442367 0.04129948 0.0008447148
## 47     47 0.03210457 0.22416884 0.02496012 0.001435905 0.04059665 0.0008571557
## 48     48 0.03215396 0.22208390 0.02496497 0.001367959 0.03540737 0.0008392124
## 49     49 0.03211062 0.22404659 0.02493303 0.001433535 0.04132750 0.0008970393
## 50     50 0.03209726 0.22436420 0.02494961 0.001416106 0.04016010 0.0008928591
## 51     51 0.03211101 0.22405961 0.02494186 0.001412430 0.03954804 0.0008834049
## 52     52 0.03217387 0.22061515 0.02498129 0.001391007 0.04071029 0.0008897043
## 53     53 0.03218264 0.22077620 0.02494981 0.001429008 0.04076332 0.0008810413
## 54     54 0.03216400 0.22198101 0.02497226 0.001382859 0.03559469 0.0008410044
## 55     55 0.03212483 0.22349935 0.02494206 0.001407913 0.03907454 0.0008742864
## 56     56 0.03211988 0.22366659 0.02493888 0.001390261 0.03823314 0.0008750261
## 57     57 0.03211959 0.22379649 0.02495186 0.001411565 0.03936397 0.0008980209
## 58     58 0.03211239 0.22416596 0.02494613 0.001415538 0.03898629 0.0008897171
## 59     59 0.03212545 0.22365908 0.02495682 0.001432631 0.03946297 0.0008790120
## 60     60 0.03212316 0.22378696 0.02494761 0.001412651 0.03888734 0.0008747153
## 61     61 0.03213229 0.22284483 0.02493115 0.001428787 0.04044992 0.0008843802
## 62     62 0.03211137 0.22419724 0.02493487 0.001442324 0.03994127 0.0008862970
## 63     63 0.03210130 0.22495859 0.02494326 0.001431823 0.04007662 0.0008924303
## 64     64 0.03212862 0.22353731 0.02493078 0.001454168 0.04170003 0.0009058972
## 65     65 0.03207369 0.22624304 0.02492354 0.001446947 0.04059217 0.0009086123
## 66     66 0.03208122 0.22592667 0.02492976 0.001445905 0.04036995 0.0009069383
## 67     67 0.03208651 0.22577119 0.02492386 0.001467726 0.04172392 0.0009210062
## 68     68 0.03208446 0.22595476 0.02492810 0.001461677 0.04126619 0.0009395816
## 69     69 0.03205910 0.22649728 0.02489462 0.001448053 0.04099376 0.0009063671
## 70     70 0.03207017 0.22661973 0.02491569 0.001471120 0.04150434 0.0009530351
## 71     71 0.03212585 0.22402256 0.02497390 0.001434907 0.03861433 0.0009287974
## 72     72 0.03207177 0.22660594 0.02492913 0.001478852 0.04139493 0.0009545841
## 73     73 0.03207884 0.22627552 0.02493485 0.001469068 0.04036586 0.0009346723
## 74     74 0.03207339 0.22659491 0.02493149 0.001463956 0.04019921 0.0009356794
## 75     75 0.03206740 0.22692688 0.02491881 0.001474248 0.04055087 0.0009335688
## 76     76 0.03209832 0.22548493 0.02495169 0.001476863 0.04066274 0.0009247455
## 77     77 0.03206180 0.22727944 0.02491406 0.001478382 0.04097980 0.0009401027
## 78     78 0.03210466 0.22514188 0.02491065 0.001492263 0.04234991 0.0009652147
## 79     79 0.03206641 0.22712476 0.02491256 0.001496181 0.04177050 0.0009683861
## 80     80 0.03213538 0.22331485 0.02494660 0.001497440 0.04525489 0.0009907187
## 81     81 0.03205316 0.22773533 0.02489855 0.001505466 0.04189693 0.0009817560
## 82     82 0.03204522 0.22810910 0.02489000 0.001515063 0.04205617 0.0009825367
## 83     83 0.03209940 0.22557559 0.02491597 0.001483113 0.04203845 0.0010088293
## 84     84 0.03205112 0.22794690 0.02488631 0.001492402 0.04020826 0.0009679658
## 85     85 0.03201806 0.22884793 0.02488286 0.001507537 0.04221983 0.0009846185
## 86     86 0.03202085 0.22917024 0.02487680 0.001505395 0.04147502 0.0009788390
## 87     87 0.03203653 0.22846550 0.02489505 0.001515744 0.04140317 0.0009708080
## 88     88 0.03200244 0.22999129 0.02485773 0.001499302 0.04104252 0.0009852717
## 89     89 0.03203843 0.22804545 0.02484371 0.001458541 0.04024568 0.0009581253
## 90     90 0.03202266 0.22898510 0.02487562 0.001450038 0.03835310 0.0009338068
## 91     91 0.03199410 0.23035360 0.02485436 0.001491790 0.04017857 0.0009754344
## 92     92 0.03199145 0.23051231 0.02485813 0.001501909 0.04064253 0.0009808718
## 93     93 0.03209082 0.22576404 0.02494425 0.001429129 0.03971036 0.0009815257
## 94     94 0.03198553 0.23027567 0.02485954 0.001473212 0.03947547 0.0009532572
## 95     95 0.03202998 0.22870485 0.02489114 0.001444778 0.03757072 0.0009341900
## 96     96 0.03199788 0.23041260 0.02485618 0.001503094 0.04026775 0.0009965283
## 97     97 0.03199814 0.23028712 0.02485950 0.001519767 0.04058028 0.0009996734
## 98     98 0.03205836 0.22734122 0.02492342 0.001465661 0.04025952 0.0009868548
## 99     99 0.03199502 0.23042438 0.02485221 0.001532208 0.04059236 0.0010076699
## 100   100 0.03203984 0.22819130 0.02486701 0.001533326 0.04152059 0.0010087978
## 101   101 0.03199256 0.23051327 0.02485819 0.001521955 0.03993837 0.0009966870
## 102   102 0.03199608 0.22989097 0.02487171 0.001511168 0.03959427 0.0009545563
## 103   103 0.03197542 0.23140292 0.02483410 0.001527545 0.04088315 0.0010061924
## 104   104 0.03197442 0.23134640 0.02484161 0.001529584 0.04039329 0.0010055524
## 105   105 0.03197723 0.23123436 0.02483659 0.001532022 0.04045956 0.0010119291
## 106   106 0.03197358 0.23140859 0.02483619 0.001533794 0.04089880 0.0010164772
## 107   107 0.03198969 0.23029103 0.02486281 0.001535890 0.04228560 0.0010146620
## 108   108 0.03197029 0.23154870 0.02483612 0.001541826 0.04123377 0.0010298468
## 109   109 0.03197142 0.23147689 0.02483488 0.001538675 0.04102166 0.0010223219
## 110   110 0.03196823 0.23159643 0.02482476 0.001529341 0.04067338 0.0010134655
## 111   111 0.03197336 0.23138431 0.02482805 0.001533506 0.04106195 0.0010142136
## 112   112 0.03197246 0.23149132 0.02482195 0.001536747 0.04117865 0.0010072769
## 113   113 0.03197274 0.23125381 0.02480490 0.001528061 0.04118519 0.0009785909
## 114   114 0.03197604 0.23132361 0.02482609 0.001528680 0.04094427 0.0010053142
## 115   115 0.03197837 0.23122685 0.02483174 0.001530312 0.04067612 0.0010056174
## 116   116 0.03197979 0.23118245 0.02483204 0.001530900 0.04075648 0.0010049239
## 117   117 0.03198423 0.23099071 0.02483741 0.001524602 0.04059462 0.0010025404
## 118   118 0.03198748 0.23086744 0.02484001 0.001524930 0.04057813 0.0009998928
## 119   119 0.03199519 0.23053784 0.02484830 0.001525864 0.04054937 0.0010003865
## 120   120 0.03197408 0.23124329 0.02480665 0.001495980 0.03990187 0.0009640899
## 121   121 0.03198885 0.23082304 0.02484123 0.001523881 0.04011096 0.0009993482
## 122   122 0.03199615 0.23052753 0.02484857 0.001524203 0.04019993 0.0009978955
## 123   123 0.03199822 0.23042646 0.02484948 0.001520157 0.04005783 0.0009911628
## 124   124 0.03200088 0.23032480 0.02485123 0.001522284 0.04027961 0.0009901903
## 125   125 0.03200034 0.23011680 0.02483202 0.001523590 0.04056994 0.0009803431
## 126   126 0.03200152 0.23009132 0.02482920 0.001524954 0.04067482 0.0009875120
## 127   127 0.03198818 0.23049727 0.02483545 0.001506655 0.04014355 0.0009768843
## 128   128 0.03200849 0.23002288 0.02485181 0.001528606 0.04074557 0.0010019670
## 129   129 0.03200626 0.23011574 0.02484972 0.001527317 0.04070563 0.0010032649
## 130   130 0.03200723 0.23010004 0.02485102 0.001527097 0.04074447 0.0010069934
## 131   131 0.03200245 0.23012813 0.02483505 0.001518456 0.04059960 0.0009949585
## 132   132 0.03200983 0.22999040 0.02485419 0.001530295 0.04084604 0.0010071048
## 133   133 0.03201344 0.22984440 0.02485941 0.001533387 0.04108508 0.0010103116
## 134   134 0.03201365 0.22984281 0.02486109 0.001534540 0.04101024 0.0010112674
## 135   135 0.03201087 0.22997089 0.02485950 0.001532274 0.04101278 0.0010095930
## 136   136 0.03200837 0.23008806 0.02485724 0.001532864 0.04107810 0.0010121125
## 137   137 0.03200963 0.23002545 0.02485927 0.001531753 0.04099892 0.0010112603
## 138   138 0.03201521 0.22978175 0.02486343 0.001529153 0.04079978 0.0010098656
## 139   139 0.03201903 0.22916043 0.02487057 0.001480527 0.03967488 0.0009688835
## 140   140 0.03201422 0.22981594 0.02486244 0.001528604 0.04084881 0.0010119257
## 141   141 0.03200274 0.23002420 0.02484646 0.001512081 0.04035938 0.0009811341
## 142   142 0.03200153 0.23021949 0.02484716 0.001512919 0.04045506 0.0010025637
## 143   143 0.03201593 0.22975229 0.02486209 0.001532447 0.04099954 0.0010146881
## 144   144 0.03203446 0.22884630 0.02487070 0.001507502 0.03968331 0.0009960091
## 145   145 0.03201745 0.22970677 0.02486172 0.001533723 0.04103160 0.0010156287
## 146   146 0.03204051 0.22858179 0.02488113 0.001533606 0.04136434 0.0010181282
## 147   147 0.03201959 0.22961148 0.02486442 0.001533643 0.04103777 0.0010153806
## 148   148 0.03203008 0.22909522 0.02487562 0.001521796 0.04074210 0.0010074981
## 149   149 0.03203274 0.22898076 0.02487742 0.001521453 0.04074008 0.0010080632
## 150   150 0.03202152 0.22954559 0.02486707 0.001536138 0.04108821 0.0010166699
## 151   151 0.03202283 0.22948672 0.02486717 0.001536222 0.04104770 0.0010162828
## 152   152 0.03202515 0.22937927 0.02487262 0.001536550 0.04104296 0.0010142180
## 153   153 0.03202280 0.22949054 0.02486656 0.001536304 0.04106531 0.0010170736
## 154   154 0.03202125 0.22955754 0.02486463 0.001536783 0.04108383 0.0010179749
## 155   155 0.03202162 0.22953372 0.02486414 0.001537131 0.04110253 0.0010177444
## 156   156 0.03202231 0.22950264 0.02486497 0.001537206 0.04114068 0.0010179990
## 157   157 0.03203734 0.22880799 0.02488339 0.001533359 0.04192871 0.0010152820
## 158   158 0.03203789 0.22878322 0.02488330 0.001533634 0.04194449 0.0010153819
## 159   159 0.03202387 0.22941793 0.02486642 0.001536021 0.04112788 0.0010202464
## 160   160 0.03201321 0.22976841 0.02486820 0.001524236 0.04057383 0.0010087807
## 161   161 0.03202871 0.22918760 0.02487485 0.001529922 0.04087070 0.0010117449
## 162   162 0.03201966 0.22958734 0.02486870 0.001534560 0.04116043 0.0010113049
## 163   163 0.03201627 0.22978243 0.02486023 0.001539102 0.04121090 0.0010182024
## 164   164 0.03202267 0.22948948 0.02486541 0.001536708 0.04113085 0.0010181546
## [1] "Best Model"
##     nvmax
## 110   110

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  2.096921e+00  2.096096e+00  2.097746e+00
## PC1         -4.820829e-04 -5.540102e-04 -4.101556e-04
## PC2         -9.439758e-04 -1.017022e-03 -8.709292e-04
## PC3         -4.376253e-04 -5.110635e-04 -3.641872e-04
## PC4         -3.485515e-04 -4.229921e-04 -2.741108e-04
## PC5          2.305793e-04  1.538579e-04  3.073007e-04
## PC6         -9.916849e-05 -1.759127e-04 -2.242430e-05
## PC7         -2.034675e-04 -2.818330e-04 -1.251020e-04
## PC8         -3.732177e-05 -1.175225e-04  4.287892e-05
## PC9         -5.229907e-05 -1.344202e-04  2.982208e-05
## PC11        -5.388938e-04 -6.279221e-04 -4.498656e-04
## PC12        -5.049829e-04 -5.988052e-04 -4.111606e-04
## PC13         3.431100e-04  2.475465e-04  4.386736e-04
## PC14         2.527666e-04  1.537902e-04  3.517430e-04
## PC16         3.558417e-04  2.538653e-04  4.578182e-04
## PC17        -1.998529e-04 -3.074757e-04 -9.223015e-05
## PC18        -3.654310e-04 -4.773739e-04 -2.534881e-04
## PC19         4.317135e-05 -7.010991e-05  1.564526e-04
## PC20         4.101492e-04  2.862502e-04  5.340482e-04
## PC21         8.180626e-05 -4.747785e-05  2.110904e-04
## PC22         9.282618e-05 -1.090969e-04  2.947493e-04
## PC23         2.053985e-04 -4.390229e-05  4.546993e-04
## PC24        -7.967536e-04 -1.087088e-03 -5.064190e-04
## PC25         2.524151e-04 -7.599813e-05  5.808283e-04
## PC26         3.833125e-04  4.667886e-05  7.199461e-04
## PC27         2.715470e-04 -6.361989e-05  6.067139e-04
## PC29         3.679176e-04 -5.941506e-06  7.417766e-04
## PC32        -7.123956e-04 -1.128028e-03 -2.967633e-04
## PC33         7.039065e-04  2.795701e-04  1.128243e-03
## PC34         1.099756e-03  6.511037e-04  1.548408e-03
## PC37        -3.603607e-04 -8.610729e-04  1.403515e-04
## PC38         2.032648e-04 -3.141561e-04  7.206857e-04
## PC39        -2.080906e-04 -7.413170e-04  3.251358e-04
## PC42        -2.251949e-04 -7.889242e-04  3.385343e-04
## PC44         6.421781e-04  7.553640e-05  1.208820e-03
## PC45        -2.968457e-04 -8.659781e-04  2.722868e-04
## PC47        -4.846403e-04 -1.054791e-03  8.551069e-05
## PC49         3.431287e-04 -2.384184e-04  9.246758e-04
## PC57        -7.572567e-04 -1.370314e-03 -1.441993e-04
## PC59         9.818223e-04  3.716729e-04  1.591972e-03
## PC62        -3.745738e-04 -1.004500e-03  2.553524e-04
## PC63        -7.029549e-04 -1.328927e-03 -7.698276e-05
## PC64        -9.023538e-04 -1.535550e-03 -2.691578e-04
## PC66        -4.331374e-04 -1.073173e-03  2.068977e-04
## PC68         4.950411e-04 -1.474936e-04  1.137576e-03
## PC71         5.213597e-04 -1.282802e-04  1.171000e-03
## PC73         4.679612e-04 -1.857211e-04  1.121643e-03
## PC74        -6.580551e-04 -1.315861e-03 -2.495721e-07
## PC75        -8.829193e-04 -1.546441e-03 -2.193976e-04
## PC77         4.911264e-04 -1.697421e-04  1.151995e-03
## PC78         2.765648e-04 -3.878102e-04  9.409398e-04
## PC79         5.663015e-04 -1.042508e-04  1.236854e-03
## PC81         7.267212e-04  5.523200e-05  1.398210e-03
## PC82         4.337908e-04 -2.533935e-04  1.120975e-03
## PC83        -7.194573e-04 -1.403123e-03 -3.579173e-05
## PC84         7.996992e-04  1.137021e-04  1.485696e-03
## PC85         1.123558e-03  4.290312e-04  1.818084e-03
## PC87         1.719575e-03  1.029988e-03  2.409162e-03
## PC88        -1.139771e-03 -1.842020e-03 -4.375212e-04
## PC89        -5.395392e-04 -1.239624e-03  1.605451e-04
## PC90        -5.072518e-04 -1.208860e-03  1.943566e-04
## PC92         2.749095e-04 -4.208083e-04  9.706273e-04
## PC94        -9.179035e-04 -1.626069e-03 -2.097378e-04
## PC96        -4.613714e-04 -1.176317e-03  2.535744e-04
## PC97        -5.007501e-04 -1.211172e-03  2.096717e-04
## PC98        -4.817032e-04 -1.191269e-03  2.278624e-04
## PC99        -4.487862e-04 -1.159985e-03  2.624125e-04
## PC102       -5.760555e-04 -1.289414e-03  1.373025e-04
## PC104       -6.682706e-04 -1.388170e-03  5.162844e-05
## PC105        4.955110e-04 -2.268609e-04  1.217883e-03
## PC106        1.242436e-03  5.179929e-04  1.966879e-03
## PC107        6.103805e-04 -1.150509e-04  1.335812e-03
## PC109        5.375521e-04 -1.881497e-04  1.263254e-03
## PC110       -5.635217e-04 -1.293753e-03  1.667097e-04
## PC111       -8.082237e-04 -1.541821e-03 -7.462696e-05
## PC113        3.104138e-04 -4.253179e-04  1.046145e-03
## PC114       -7.563989e-04 -1.484119e-03 -2.867898e-05
## PC115       -1.655666e-03 -2.389151e-03 -9.221812e-04
## PC118        7.180344e-04 -2.119439e-05  1.457263e-03
## PC119       -5.373825e-04 -1.276933e-03  2.021678e-04
## PC121       -4.009325e-04 -1.143311e-03  3.414462e-04
## PC122        4.979362e-04 -2.400266e-04  1.235899e-03
## PC123       -5.413550e-04 -1.285929e-03  2.032187e-04
## PC125        4.774826e-04 -2.684608e-04  1.223426e-03
## PC128       -9.910001e-04 -1.736981e-03 -2.450190e-04
## PC130        4.081361e-04 -3.380762e-04  1.154348e-03
## PC131       -1.474609e-03 -2.219450e-03 -7.297668e-04
## PC132        3.074585e-04 -4.456296e-04  1.060547e-03
## PC134        9.756772e-04  2.242233e-04  1.727131e-03
## PC135        4.401274e-04 -3.112522e-04  1.191507e-03
## PC136        5.537986e-04 -2.028168e-04  1.310414e-03
## PC137       -7.524878e-04 -1.508038e-03  3.062572e-06
## PC138        5.529543e-04 -2.063313e-04  1.312240e-03
## PC139       -7.448016e-04 -1.499809e-03  1.020571e-05
## PC140       -3.901045e-04 -1.147415e-03  3.672062e-04
## PC141        3.924341e-04 -3.613440e-04  1.146212e-03
## PC143        3.158275e-04 -4.421895e-04  1.073844e-03
## PC144        1.024203e-03  2.633283e-04  1.785078e-03
## PC146        5.923295e-04 -1.751615e-04  1.359820e-03
## PC148       -5.130788e-04 -1.275746e-03  2.495885e-04
## PC151        6.756695e-04 -9.189504e-05  1.443234e-03
## PC152       -6.885784e-04 -1.455780e-03  7.862350e-05
## PC153        4.660142e-04 -3.072961e-04  1.239325e-03
## PC154       -8.514425e-04 -1.620867e-03 -8.201780e-05
## PC155        1.067769e-03  2.971624e-04  1.838375e-03
## PC156        1.368886e-03  5.966260e-04  2.141146e-03
## PC159        2.124070e-03  1.347256e-03  2.900884e-03
## PC161        3.374139e-04 -4.362380e-04  1.111066e-03
## PC162       -1.145268e-03 -1.925606e-03 -3.649295e-04
## PC163        6.712821e-04 -1.039883e-04  1.446553e-03
## PC164        3.014931e-04 -4.803026e-04  1.083289e-03

Test

if (algo.stepwise.caret == TRUE){
  test.model(model.stepwise, data.test
             ,method = 'leapSeq',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
  
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.024   2.087   2.101   2.097   2.110   2.141 
## [1] "leapSeq  Test MSE: 0.00102089613568135"
## [1] "leapSeq  Test RMSE: 0.0319514653135244"
## [1] "leapSeq  Test MSE (Org Scale): 90.4181132605642"
## [1] "leapSeq  Test RMSE (Org Scale): 9.50884394974301"

LASSO with CV

Train

if (algo.LASSO.caret == TRUE){
  set.seed(1)
  tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "glmnet"
                                   ,subopt = 'LASSO'
                                   ,tune.grid = tune.grid
                                   ,feature.names = feature.names)
  model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.000242 on full training set
## glmnet 
## 
## 5584 samples
##  164 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE        Rsquared    MAE       
##   0.0001000000  0.03192334  0.23256979  0.02479329
##   0.0001047616  0.03192017  0.23267315  0.02479126
##   0.0001097499  0.03191703  0.23277572  0.02478938
##   0.0001149757  0.03191385  0.23288053  0.02478753
##   0.0001204504  0.03191066  0.23298596  0.02478569
##   0.0001261857  0.03190749  0.23309164  0.02478383
##   0.0001321941  0.03190440  0.23319475  0.02478200
##   0.0001384886  0.03190140  0.23329419  0.02478012
##   0.0001450829  0.03189843  0.23339333  0.02477823
##   0.0001519911  0.03189562  0.23348725  0.02477657
##   0.0001592283  0.03189296  0.23357676  0.02477519
##   0.0001668101  0.03189018  0.23367509  0.02477367
##   0.0001747528  0.03188737  0.23377816  0.02477232
##   0.0001830738  0.03188463  0.23388173  0.02477097
##   0.0001917910  0.03188206  0.23398354  0.02476972
##   0.0002009233  0.03187977  0.23407637  0.02476863
##   0.0002104904  0.03187780  0.23415956  0.02476783
##   0.0002205131  0.03187626  0.23422798  0.02476719
##   0.0002310130  0.03187533  0.23427468  0.02476696
##   0.0002420128  0.03187494  0.23430291  0.02476707
##   0.0002535364  0.03187510  0.23431317  0.02476740
##   0.0002656088  0.03187590  0.23430039  0.02476839
##   0.0002782559  0.03187739  0.23426395  0.02476974
##   0.0002915053  0.03187977  0.23419148  0.02477165
##   0.0003053856  0.03188317  0.23407928  0.02477458
##   0.0003199267  0.03188711  0.23395152  0.02477761
##   0.0003351603  0.03189166  0.23380596  0.02478130
##   0.0003511192  0.03189713  0.23362920  0.02478560
##   0.0003678380  0.03190367  0.23341163  0.02479020
##   0.0003853529  0.03191097  0.23317238  0.02479551
##   0.0004037017  0.03191924  0.23289975  0.02480145
##   0.0004229243  0.03192820  0.23261015  0.02480729
##   0.0004430621  0.03193818  0.23228786  0.02481309
##   0.0004641589  0.03194906  0.23194155  0.02481965
##   0.0004862602  0.03196120  0.23155432  0.02482729
##   0.0005094138  0.03197351  0.23118155  0.02483491
##   0.0005336699  0.03198675  0.23078424  0.02484329
##   0.0005590810  0.03200042  0.23038950  0.02485163
##   0.0005857021  0.03201532  0.22995369  0.02486049
##   0.0006135907  0.03203109  0.22950639  0.02486990
##   0.0006428073  0.03204891  0.22898306  0.02488025
##   0.0006734151  0.03206727  0.22846153  0.02489139
##   0.0007054802  0.03208805  0.22784197  0.02490477
##   0.0007390722  0.03210996  0.22719088  0.02491979
##   0.0007742637  0.03213294  0.22651181  0.02493579
##   0.0008111308  0.03215731  0.22579164  0.02495384
##   0.0008497534  0.03218442  0.22494940  0.02497414
##   0.0008902151  0.03221414  0.22400519  0.02499562
##   0.0009326033  0.03224739  0.22289891  0.02501976
##   0.0009770100  0.03228350  0.22167081  0.02504595
##   0.0010235310  0.03232380  0.22023798  0.02507435
##   0.0010722672  0.03236547  0.21876291  0.02510399
##   0.0011233240  0.03240940  0.21718667  0.02513565
##   0.0011768120  0.03245364  0.21563813  0.02516799
##   0.0012328467  0.03249995  0.21401243  0.02520142
##   0.0012915497  0.03254912  0.21227700  0.02523695
##   0.0013530478  0.03260043  0.21046509  0.02527522
##   0.0014174742  0.03265429  0.20854376  0.02531525
##   0.0014849683  0.03271106  0.20647906  0.02535810
##   0.0015556761  0.03277112  0.20425059  0.02540337
##   0.0016297508  0.03283540  0.20179158  0.02545144
##   0.0017073526  0.03290516  0.19902730  0.02550423
##   0.0017886495  0.03298082  0.19590619  0.02556229
##   0.0018738174  0.03306077  0.19251228  0.02562364
##   0.0019630407  0.03314578  0.18877667  0.02568779
##   0.0020565123  0.03322949  0.18515876  0.02575014
##   0.0021544347  0.03331246  0.18159591  0.02580962
##   0.0022570197  0.03339381  0.17819523  0.02586751
##   0.0023644894  0.03347421  0.17488529  0.02592473
##   0.0024770764  0.03355633  0.17148879  0.02598385
##   0.0025950242  0.03363977  0.16798637  0.02604424
##   0.0027185882  0.03372235  0.16459504  0.02610526
##   0.0028480359  0.03380349  0.16130186  0.02616583
##   0.0029836472  0.03388369  0.15816282  0.02622547
##   0.0031257158  0.03396480  0.15500184  0.02628510
##   0.0032745492  0.03405060  0.15153449  0.02634950
##   0.0034304693  0.03414351  0.14749356  0.02641832
##   0.0035938137  0.03424253  0.14294475  0.02649136
##   0.0037649358  0.03434815  0.13774584  0.02657044
##   0.0039442061  0.03445578  0.13227803  0.02665017
##   0.0041320124  0.03456599  0.12638303  0.02673120
##   0.0043287613  0.03467759  0.12020088  0.02681305
##   0.0045348785  0.03479095  0.11353937  0.02689610
##   0.0047508102  0.03490180  0.10689398  0.02697584
##   0.0049770236  0.03500839  0.10024208  0.02705378
##   0.0052140083  0.03509728  0.09540295  0.02711916
##   0.0054622772  0.03517456  0.09155183  0.02717667
##   0.0057223677  0.03523679  0.08951061  0.02722314
##   0.0059948425  0.03528814  0.08887341  0.02726169
##   0.0062802914  0.03533959  0.08867342  0.02729983
##   0.0065793322  0.03539414  0.08867342  0.02733970
##   0.0068926121  0.03545391  0.08867342  0.02738302
##   0.0072208090  0.03551938  0.08867342  0.02743051
##   0.0075646333  0.03559108  0.08867342  0.02748282
##   0.0079248290  0.03566959  0.08867342  0.02753976
##   0.0083021757  0.03575554  0.08867342  0.02760157
##   0.0086974900  0.03584961  0.08867342  0.02766887
##   0.0091116276  0.03595256  0.08867342  0.02774301
##   0.0095454846  0.03606519  0.08867342  0.02782424
##   0.0100000000  0.03618838  0.08867342  0.02791214
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.0002420128.

##    alpha       lambda
## 20     1 0.0002420128
##     alpha       lambda       RMSE   Rsquared        MAE      RMSESD RsquaredSD        MAESD
## 1       1 0.0001000000 0.03192334 0.23256979 0.02479329 0.001501843 0.04117543 0.0009669122
## 2       1 0.0001047616 0.03192017 0.23267315 0.02479126 0.001500449 0.04117915 0.0009645980
## 3       1 0.0001097499 0.03191703 0.23277572 0.02478938 0.001499021 0.04118413 0.0009623065
## 4       1 0.0001149757 0.03191385 0.23288053 0.02478753 0.001497539 0.04119032 0.0009601090
## 5       1 0.0001204504 0.03191066 0.23298596 0.02478569 0.001495986 0.04119790 0.0009578472
## 6       1 0.0001261857 0.03190749 0.23309164 0.02478383 0.001494337 0.04120347 0.0009554591
## 7       1 0.0001321941 0.03190440 0.23319475 0.02478200 0.001492604 0.04120761 0.0009529807
## 8       1 0.0001384886 0.03190140 0.23329419 0.02478012 0.001490736 0.04120703 0.0009503189
## 9       1 0.0001450829 0.03189843 0.23339333 0.02477823 0.001488791 0.04120402 0.0009474227
## 10      1 0.0001519911 0.03189562 0.23348725 0.02477657 0.001486889 0.04120382 0.0009444715
## 11      1 0.0001592283 0.03189296 0.23357676 0.02477519 0.001484980 0.04120548 0.0009416023
## 12      1 0.0001668101 0.03189018 0.23367509 0.02477367 0.001483047 0.04121763 0.0009386812
## 13      1 0.0001747528 0.03188737 0.23377816 0.02477232 0.001480936 0.04123592 0.0009356406
## 14      1 0.0001830738 0.03188463 0.23388173 0.02477097 0.001478798 0.04126336 0.0009325507
## 15      1 0.0001917910 0.03188206 0.23398354 0.02476972 0.001476714 0.04129762 0.0009295913
## 16      1 0.0002009233 0.03187977 0.23407637 0.02476863 0.001474469 0.04132824 0.0009266080
## 17      1 0.0002104904 0.03187780 0.23415956 0.02476783 0.001472225 0.04136367 0.0009234413
## 18      1 0.0002205131 0.03187626 0.23422798 0.02476719 0.001469878 0.04140120 0.0009200294
## 19      1 0.0002310130 0.03187533 0.23427468 0.02476696 0.001467392 0.04144430 0.0009165122
## 20      1 0.0002420128 0.03187494 0.23430291 0.02476707 0.001464642 0.04147838 0.0009125592
## 21      1 0.0002535364 0.03187510 0.23431317 0.02476740 0.001461773 0.04151042 0.0009083437
## 22      1 0.0002656088 0.03187590 0.23430039 0.02476839 0.001458755 0.04153785 0.0009037501
## 23      1 0.0002782559 0.03187739 0.23426395 0.02476974 0.001455688 0.04156229 0.0008990177
## 24      1 0.0002915053 0.03187977 0.23419148 0.02477165 0.001452326 0.04156849 0.0008937243
## 25      1 0.0003053856 0.03188317 0.23407928 0.02477458 0.001448839 0.04157442 0.0008884160
## 26      1 0.0003199267 0.03188711 0.23395152 0.02477761 0.001445094 0.04157744 0.0008828587
## 27      1 0.0003351603 0.03189166 0.23380596 0.02478130 0.001441000 0.04157508 0.0008768307
## 28      1 0.0003511192 0.03189713 0.23362920 0.02478560 0.001436661 0.04156644 0.0008702125
## 29      1 0.0003678380 0.03190367 0.23341163 0.02479020 0.001431963 0.04154425 0.0008628696
## 30      1 0.0003853529 0.03191097 0.23317238 0.02479551 0.001427161 0.04151684 0.0008551571
## 31      1 0.0004037017 0.03191924 0.23289975 0.02480145 0.001422319 0.04147053 0.0008474630
## 32      1 0.0004229243 0.03192820 0.23261015 0.02480729 0.001417270 0.04141539 0.0008397799
## 33      1 0.0004430621 0.03193818 0.23228786 0.02481309 0.001412321 0.04135100 0.0008320904
## 34      1 0.0004641589 0.03194906 0.23194155 0.02481965 0.001407210 0.04129646 0.0008247388
## 35      1 0.0004862602 0.03196120 0.23155432 0.02482729 0.001401684 0.04122082 0.0008168112
## 36      1 0.0005094138 0.03197351 0.23118155 0.02483491 0.001396227 0.04115218 0.0008088982
## 37      1 0.0005336699 0.03198675 0.23078424 0.02484329 0.001390415 0.04105535 0.0008009227
## 38      1 0.0005590810 0.03200042 0.23038950 0.02485163 0.001384973 0.04097258 0.0007936120
## 39      1 0.0005857021 0.03201532 0.22995369 0.02486049 0.001379106 0.04083810 0.0007862218
## 40      1 0.0006135907 0.03203109 0.22950639 0.02486990 0.001373708 0.04071931 0.0007792893
## 41      1 0.0006428073 0.03204891 0.22898306 0.02488025 0.001368244 0.04057154 0.0007726106
## 42      1 0.0006734151 0.03206727 0.22846153 0.02489139 0.001364492 0.04051577 0.0007678790
## 43      1 0.0007054802 0.03208805 0.22784197 0.02490477 0.001361221 0.04049606 0.0007639254
## 44      1 0.0007390722 0.03210996 0.22719088 0.02491979 0.001357544 0.04044563 0.0007599225
## 45      1 0.0007742637 0.03213294 0.22651181 0.02493579 0.001352999 0.04031782 0.0007549115
## 46      1 0.0008111308 0.03215731 0.22579164 0.02495384 0.001348941 0.04018886 0.0007492863
## 47      1 0.0008497534 0.03218442 0.22494940 0.02497414 0.001344495 0.04002042 0.0007436798
## 48      1 0.0008902151 0.03221414 0.22400519 0.02499562 0.001340150 0.03986803 0.0007403559
## 49      1 0.0009326033 0.03224739 0.22289891 0.02501976 0.001336185 0.03973059 0.0007379907
## 50      1 0.0009770100 0.03228350 0.22167081 0.02504595 0.001332016 0.03957195 0.0007349972
## 51      1 0.0010235310 0.03232380 0.22023798 0.02507435 0.001327697 0.03938553 0.0007319502
## 52      1 0.0010722672 0.03236547 0.21876291 0.02510399 0.001323829 0.03925974 0.0007292941
## 53      1 0.0011233240 0.03240940 0.21718667 0.02513565 0.001319038 0.03910563 0.0007270796
## 54      1 0.0011768120 0.03245364 0.21563813 0.02516799 0.001316957 0.03915390 0.0007274704
## 55      1 0.0012328467 0.03249995 0.21401243 0.02520142 0.001315040 0.03921183 0.0007276348
## 56      1 0.0012915497 0.03254912 0.21227700 0.02523695 0.001313745 0.03934112 0.0007276793
## 57      1 0.0013530478 0.03260043 0.21046509 0.02527522 0.001312148 0.03941792 0.0007281206
## 58      1 0.0014174742 0.03265429 0.20854376 0.02531525 0.001311329 0.03951368 0.0007287620
## 59      1 0.0014849683 0.03271106 0.20647906 0.02535810 0.001311430 0.03954985 0.0007298514
## 60      1 0.0015556761 0.03277112 0.20425059 0.02540337 0.001312794 0.03964692 0.0007317700
## 61      1 0.0016297508 0.03283540 0.20179158 0.02545144 0.001314320 0.03972683 0.0007338245
## 62      1 0.0017073526 0.03290516 0.19902730 0.02550423 0.001316736 0.03989129 0.0007357228
## 63      1 0.0017886495 0.03298082 0.19590619 0.02556229 0.001318781 0.04003413 0.0007377281
## 64      1 0.0018738174 0.03306077 0.19251228 0.02562364 0.001319783 0.04010579 0.0007392430
## 65      1 0.0019630407 0.03314578 0.18877667 0.02568779 0.001320381 0.04004958 0.0007405088
## 66      1 0.0020565123 0.03322949 0.18515876 0.02575014 0.001321319 0.04016913 0.0007421136
## 67      1 0.0021544347 0.03331246 0.18159591 0.02580962 0.001319478 0.04002657 0.0007408500
## 68      1 0.0022570197 0.03339381 0.17819523 0.02586751 0.001320650 0.04017659 0.0007402892
## 69      1 0.0023644894 0.03347421 0.17488529 0.02592473 0.001320061 0.04003338 0.0007376674
## 70      1 0.0024770764 0.03355633 0.17148879 0.02598385 0.001319651 0.03999356 0.0007356338
## 71      1 0.0025950242 0.03363977 0.16798637 0.02604424 0.001313531 0.03959595 0.0007289423
## 72      1 0.0027185882 0.03372235 0.16459504 0.02610526 0.001307551 0.03943793 0.0007235330
## 73      1 0.0028480359 0.03380349 0.16130186 0.02616583 0.001296572 0.03880615 0.0007145113
## 74      1 0.0029836472 0.03388369 0.15816282 0.02622547 0.001285548 0.03830745 0.0007058147
## 75      1 0.0031257158 0.03396480 0.15500184 0.02628510 0.001273499 0.03752033 0.0006953765
## 76      1 0.0032745492 0.03405060 0.15153449 0.02634950 0.001262952 0.03679102 0.0006850448
## 77      1 0.0034304693 0.03414351 0.14749356 0.02641832 0.001252239 0.03596074 0.0006748276
## 78      1 0.0035938137 0.03424253 0.14294475 0.02649136 0.001244117 0.03535746 0.0006675857
## 79      1 0.0037649358 0.03434815 0.13774584 0.02657044 0.001238157 0.03459906 0.0006609773
## 80      1 0.0039442061 0.03445578 0.13227803 0.02665017 0.001231678 0.03404481 0.0006551871
## 81      1 0.0041320124 0.03456599 0.12638303 0.02673120 0.001221807 0.03304916 0.0006477175
## 82      1 0.0043287613 0.03467759 0.12020088 0.02681305 0.001211126 0.03227306 0.0006412668
## 83      1 0.0045348785 0.03479095 0.11353937 0.02689610 0.001194181 0.03085990 0.0006302547
## 84      1 0.0047508102 0.03490180 0.10689398 0.02697584 0.001178457 0.02996663 0.0006212900
## 85      1 0.0049770236 0.03500839 0.10024208 0.02705378 0.001160715 0.02791788 0.0006073701
## 86      1 0.0052140083 0.03509728 0.09540295 0.02711916 0.001152257 0.02769593 0.0006021656
## 87      1 0.0054622772 0.03517456 0.09155183 0.02717667 0.001140811 0.02658807 0.0005930574
## 88      1 0.0057223677 0.03523679 0.08951061 0.02722314 0.001138263 0.02713387 0.0005900847
## 89      1 0.0059948425 0.03528814 0.08887341 0.02726169 0.001135241 0.02690684 0.0005846312
## 90      1 0.0062802914 0.03533959 0.08867342 0.02729983 0.001133208 0.02694524 0.0005791697
## 91      1 0.0065793322 0.03539414 0.08867342 0.02733970 0.001129139 0.02694524 0.0005723130
## 92      1 0.0068926121 0.03545391 0.08867342 0.02738302 0.001125048 0.02694524 0.0005662155
## 93      1 0.0072208090 0.03551938 0.08867342 0.02743051 0.001120960 0.02694524 0.0005601812
## 94      1 0.0075646333 0.03559108 0.08867342 0.02748282 0.001116907 0.02694524 0.0005542403
## 95      1 0.0079248290 0.03566959 0.08867342 0.02753976 0.001112924 0.02694524 0.0005479786
## 96      1 0.0083021757 0.03575554 0.08867342 0.02760157 0.001109056 0.02694524 0.0005425216
## 97      1 0.0086974900 0.03584961 0.08867342 0.02766887 0.001105354 0.02694524 0.0005380517
## 98      1 0.0091116276 0.03595256 0.08867342 0.02774301 0.001101878 0.02694524 0.0005338495
## 99      1 0.0095454846 0.03606519 0.08867342 0.02782424 0.001098697 0.02694524 0.0005304576
## 100     1 0.0100000000 0.03618838 0.08867342 0.02791214 0.001095894 0.02694524 0.0005287606

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##                model.coef
## (Intercept)  2.096897e+00
## PC1         -4.645913e-04
## PC2         -9.239349e-04
## PC3         -4.166302e-04
## PC4         -3.268518e-04
## PC5          2.077041e-04
## PC6         -7.707639e-05
## PC7         -1.779289e-04
## PC8         -1.557523e-05
## PC9         -2.837554e-05
## PC11        -5.131010e-04
## PC12        -4.759358e-04
## PC13         3.137947e-04
## PC14         2.261062e-04
## PC15        -6.529096e-06
## PC16         3.269539e-04
## PC17        -1.683571e-04
## PC18        -3.317640e-04
## PC19         8.307391e-06
## PC20         3.715853e-04
## PC21         3.692625e-05
## PC22         3.022817e-05
## PC23         1.313795e-04
## PC24        -7.069816e-04
## PC25         1.570665e-04
## PC26         2.893206e-04
## PC27         1.710490e-04
## PC29         2.576447e-04
## PC32        -5.914603e-04
## PC33         5.727051e-04
## PC34         9.771484e-04
## PC37        -2.149451e-04
## PC38         5.590158e-05
## PC39        -4.145978e-05
## PC42        -4.539262e-05
## PC44         4.711501e-04
## PC45        -1.257630e-04
## PC47        -3.273804e-04
## PC49         1.703726e-04
## PC57        -5.540830e-04
## PC59         8.058432e-04
## PC62        -1.869748e-04
## PC63        -5.226898e-04
## PC64        -7.204755e-04
## PC66        -2.269471e-04
## PC67         2.998784e-05
## PC68         3.191289e-04
## PC70         3.142512e-06
## PC71         3.370170e-04
## PC73         2.724797e-04
## PC74        -4.572800e-04
## PC75        -6.720971e-04
## PC77         2.936744e-04
## PC78         8.005685e-05
## PC79         3.747930e-04
## PC81         5.257753e-04
## PC82         2.264838e-04
## PC83        -5.218655e-04
## PC84         5.929189e-04
## PC85         9.285516e-04
## PC87         1.512129e-03
## PC88        -9.333071e-04
## PC89        -3.345004e-04
## PC90        -2.833801e-04
## PC92         4.182425e-05
## PC94        -7.278763e-04
## PC96        -2.507284e-04
## PC97        -3.099825e-04
## PC98        -2.447097e-04
## PC99        -2.276525e-04
## PC102       -3.742401e-04
## PC104       -4.572499e-04
## PC105        3.051800e-04
## PC106        1.034008e-03
## PC107        4.147117e-04
## PC109        3.091838e-04
## PC110       -3.572508e-04
## PC111       -5.961771e-04
## PC113        1.188012e-04
## PC114       -5.445481e-04
## PC115       -1.437879e-03
## PC118        4.873668e-04
## PC119       -3.017491e-04
## PC120        1.052595e-05
## PC121       -2.001836e-04
## PC122        2.978399e-04
## PC123       -3.427091e-04
## PC124        3.724332e-06
## PC125        2.597312e-04
## PC128       -7.720717e-04
## PC130        1.941239e-04
## PC131       -1.260735e-03
## PC132        8.517374e-05
## PC134        7.867958e-04
## PC135        2.208958e-04
## PC136        3.151148e-04
## PC137       -5.333841e-04
## PC138        3.446180e-04
## PC139       -5.188744e-04
## PC140       -1.482569e-04
## PC141        1.830576e-04
## PC143        9.390756e-05
## PC144        8.248566e-04
## PC146        3.533680e-04
## PC147       -5.275278e-05
## PC148       -2.979852e-04
## PC151        4.423910e-04
## PC152       -4.479116e-04
## PC153        2.465481e-04
## PC154       -6.137170e-04
## PC155        8.525282e-04
## PC156        1.145196e-03
## PC157       -2.047709e-05
## PC159        1.882328e-03
## PC160        1.484216e-06
## PC161        9.546170e-05
## PC162       -9.153109e-04
## PC163        4.515326e-04
## PC164        8.160572e-05

Test

if (algo.LASSO.caret == TRUE){
  test.model(model.LASSO.caret, data.test
             ,method = 'glmnet',subopt = "LASSO"
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.030   2.088   2.101   2.097   2.109   2.138 
## [1] "glmnet LASSO Test MSE: 0.00100661928020248"
## [1] "glmnet LASSO Test RMSE: 0.0317272639886026"
## [1] "glmnet LASSO Test MSE (Org Scale): 89.2044921717936"
## [1] "glmnet LASSO Test RMSE (Org Scale): 9.44481297706808"

LARS with CV

Train

if (algo.LARS.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "lars"
                                   ,subopt = 'NULL'
                                   ,feature.names = feature.names)
  model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.758 on full training set
## Least Angle Regression 
## 
## 5584 samples
##  164 predictor
## 
## Pre-processing: centered (164), scaled (164) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   fraction    RMSE        Rsquared    MAE       
##   0.00000000  0.03638401         NaN  0.02805055
##   0.01010101  0.03599441  0.08867342  0.02777237
##   0.02020202  0.03565622  0.08867342  0.02752900
##   0.03030303  0.03537093  0.08867342  0.02732146
##   0.04040404  0.03514541  0.09297130  0.02715425
##   0.05050505  0.03494804  0.10406469  0.02700817
##   0.06060606  0.03476279  0.11546665  0.02687491
##   0.07070707  0.03459185  0.12520383  0.02674904
##   0.08080808  0.03442977  0.13387449  0.02663084
##   0.09090909  0.03427761  0.14151172  0.02651809
##   0.10101010  0.03413141  0.14839447  0.02641044
##   0.11111111  0.03399660  0.15405935  0.02631023
##   0.12121212  0.03387507  0.15865918  0.02621943
##   0.13131313  0.03376483  0.16295499  0.02613672
##   0.14141414  0.03366009  0.16721288  0.02605904
##   0.15151515  0.03355785  0.17150886  0.02598414
##   0.16161616  0.03346270  0.17547342  0.02591396
##   0.17171717  0.03337368  0.17916176  0.02585107
##   0.18181818  0.03328970  0.18267834  0.02579186
##   0.19191919  0.03320907  0.18619829  0.02573314
##   0.20202020  0.03313013  0.18962101  0.02567406
##   0.21212121  0.03305208  0.19299024  0.02561501
##   0.22222222  0.03297544  0.19626565  0.02555641
##   0.23232323  0.03290222  0.19930212  0.02549939
##   0.24242424  0.03283328  0.20205124  0.02544630
##   0.25252525  0.03277086  0.20443367  0.02539946
##   0.26262626  0.03271318  0.20656562  0.02535604
##   0.27272727  0.03266006  0.20845257  0.02531574
##   0.28282828  0.03261007  0.21021161  0.02527831
##   0.29292929  0.03256239  0.21190845  0.02524338
##   0.30303030  0.03251829  0.21347808  0.02521220
##   0.31313131  0.03247705  0.21493031  0.02518283
##   0.32323232  0.03243863  0.21626444  0.02515525
##   0.33333333  0.03240160  0.21756069  0.02512875
##   0.34343434  0.03236462  0.21888524  0.02510257
##   0.35353535  0.03232935  0.22014500  0.02507788
##   0.36363636  0.03229681  0.22129592  0.02505457
##   0.37373737  0.03226738  0.22229827  0.02503354
##   0.38383838  0.03223978  0.22322845  0.02501411
##   0.39393939  0.03221455  0.22405349  0.02499624
##   0.40404040  0.03219165  0.22477187  0.02497981
##   0.41414141  0.03217118  0.22537874  0.02496411
##   0.42424242  0.03215213  0.22594448  0.02494976
##   0.43434343  0.03213380  0.22649635  0.02493598
##   0.44444444  0.03211653  0.22701123  0.02492359
##   0.45454545  0.03210009  0.22749697  0.02491224
##   0.46464646  0.03208479  0.22793875  0.02490233
##   0.47474747  0.03207042  0.22835279  0.02489332
##   0.48484848  0.03205664  0.22875252  0.02488502
##   0.49494949  0.03204343  0.22913837  0.02487689
##   0.50505051  0.03203130  0.22948352  0.02486994
##   0.51515152  0.03202021  0.22979125  0.02486339
##   0.52525253  0.03200948  0.23009838  0.02485709
##   0.53535354  0.03199915  0.23039871  0.02485101
##   0.54545455  0.03198926  0.23068442  0.02484489
##   0.55555556  0.03198006  0.23094622  0.02483930
##   0.56565657  0.03197082  0.23122441  0.02483349
##   0.57575758  0.03196201  0.23149356  0.02482789
##   0.58585859  0.03195312  0.23177542  0.02482226
##   0.59595960  0.03194427  0.23206588  0.02481678
##   0.60606061  0.03193626  0.23232471  0.02481211
##   0.61616162  0.03192900  0.23255713  0.02480790
##   0.62626263  0.03192181  0.23279316  0.02480331
##   0.63636364  0.03191541  0.23299719  0.02479860
##   0.64646465  0.03190908  0.23320828  0.02479390
##   0.65656566  0.03190300  0.23341524  0.02478945
##   0.66666667  0.03189757  0.23359626  0.02478572
##   0.67676768  0.03189295  0.23374443  0.02478226
##   0.68686869  0.03188875  0.23388056  0.02477893
##   0.69696970  0.03188506  0.23399838  0.02477603
##   0.70707071  0.03188144  0.23412094  0.02477318
##   0.71717172  0.03187849  0.23421803  0.02477059
##   0.72727273  0.03187639  0.23428072  0.02476888
##   0.73737374  0.03187522  0.23430512  0.02476767
##   0.74747475  0.03187464  0.23430744  0.02476691
##   0.75757576  0.03187450  0.23429679  0.02476652
##   0.76767677  0.03187512  0.23425840  0.02476657
##   0.77777778  0.03187640  0.23419784  0.02476694
##   0.78787879  0.03187835  0.23411270  0.02476774
##   0.79797980  0.03188107  0.23399998  0.02476902
##   0.80808081  0.03188434  0.23386961  0.02477058
##   0.81818182  0.03188793  0.23373481  0.02477223
##   0.82828283  0.03189169  0.23360230  0.02477411
##   0.83838384  0.03189547  0.23347609  0.02477611
##   0.84848485  0.03189992  0.23332723  0.02477885
##   0.85858586  0.03190488  0.23316267  0.02478194
##   0.86868687  0.03191047  0.23297549  0.02478526
##   0.87878788  0.03191643  0.23278076  0.02478872
##   0.88888889  0.03192286  0.23257249  0.02479272
##   0.89898990  0.03192992  0.23234283  0.02479771
##   0.90909091  0.03193743  0.23210172  0.02480310
##   0.91919192  0.03194520  0.23185698  0.02480856
##   0.92929293  0.03195351  0.23159502  0.02481454
##   0.93939394  0.03196231  0.23131923  0.02482079
##   0.94949495  0.03197149  0.23103381  0.02482740
##   0.95959596  0.03198106  0.23073919  0.02483434
##   0.96969697  0.03199101  0.23043572  0.02484165
##   0.97979798  0.03200124  0.23012631  0.02484921
##   0.98989899  0.03201172  0.22981410  0.02485714
##   1.00000000  0.03202267  0.22948948  0.02486541
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.7575758.

##     fraction
## 76 0.7575758
## Warning: Removed 1 rows containing missing values (geom_point).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##           PC1           PC2           PC3           PC4           PC5           PC6           PC7           PC8 
## -5.332863e-03 -1.044467e-02 -4.680589e-03 -3.622502e-03  2.236084e-03 -8.291252e-04 -1.874068e-03 -1.605598e-04 
##           PC9          PC11          PC12          PC13          PC14          PC15          PC16          PC17 
## -2.855532e-04 -4.756169e-03 -4.184516e-03  2.708394e-03  1.885853e-03 -5.380449e-05  2.644630e-03 -1.290766e-03 
##          PC18          PC19          PC20          PC21          PC22          PC23          PC24          PC25 
## -2.446991e-03  6.079200e-05  2.475448e-03  2.361609e-04  1.237944e-04  4.353439e-04 -2.010283e-03  3.950894e-04 
##          PC26          PC27          PC29          PC32          PC33          PC34          PC37          PC38 
##  7.094747e-04  4.215485e-04  5.688791e-04 -1.175400e-03  1.114235e-03  1.798928e-03 -3.544679e-04  8.944137e-05 
##          PC39          PC42          PC44          PC45          PC47          PC49          PC57          PC59 
## -6.454831e-05 -6.676989e-05  6.869316e-04 -1.826807e-04 -4.743275e-04  2.422599e-04 -7.460410e-04  1.090068e-03 
##          PC62          PC63          PC64          PC66          PC67          PC68          PC71          PC73 
## -2.454007e-04 -6.895132e-04 -9.401858e-04 -2.930151e-04  3.909376e-05  4.104030e-04  4.284219e-04  3.446257e-04 
##          PC74          PC75          PC77          PC78          PC79          PC81          PC82          PC83 
## -5.741967e-04 -8.363334e-04  3.672726e-04  9.976832e-05  4.620840e-04  6.463820e-04  2.722015e-04 -6.306186e-04 
##          PC84          PC85          PC87          PC88          PC89          PC90          PC92          PC94 
##  7.135643e-04  1.104334e-03  1.809177e-03 -1.097415e-03 -3.947437e-04 -3.337717e-04  5.000256e-05 -8.486372e-04 
##          PC96          PC97          PC98          PC99         PC102         PC104         PC105         PC106 
## -2.897960e-04 -3.601266e-04 -2.849368e-04 -2.644878e-04 -4.330857e-04 -5.243773e-04  3.487174e-04  1.178085e-03 
##         PC107         PC109         PC110         PC111         PC113         PC114         PC115         PC118 
##  4.720819e-04  3.520929e-04 -4.040320e-04 -6.709045e-04  1.334930e-04 -6.176265e-04 -1.618772e-03  5.442466e-04 
##         PC119         PC120         PC121         PC122         PC123         PC124         PC125         PC128 
## -3.370672e-04  1.009976e-05 -2.228765e-04  3.332371e-04 -3.799905e-04  4.799659e-07  2.876962e-04 -8.542056e-04 
##         PC130         PC131         PC132         PC134         PC135         PC136         PC137         PC138 
##  2.150313e-04 -1.396784e-03  9.368906e-05  8.642841e-04  2.428342e-04  3.439469e-04 -5.825714e-04  3.750598e-04 
##         PC139         PC140         PC141         PC143         PC144         PC146         PC147         PC148 
## -5.669017e-04 -1.617705e-04  2.006277e-04  1.025875e-04  8.947662e-04  3.803750e-04 -5.735677e-05 -3.227396e-04 
##         PC151         PC152         PC153         PC154         PC155         PC156         PC157         PC159 
##  4.756667e-04 -4.821061e-04  2.634063e-04 -6.582177e-04  9.130771e-04  1.223453e-03 -2.232113e-05  1.998806e-03 
##         PC161         PC162         PC163         PC164 
##  1.021751e-04 -9.677465e-04  4.808124e-04  8.640920e-05

Test

if (algo.LARS.caret == TRUE){
  test.model(model.LARS.caret, data.test
             ,method = 'lars',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.030   2.088   2.101   2.097   2.109   2.138 
## [1] "lars  Test MSE: 0.00100662736899733"
## [1] "lars  Test RMSE: 0.031727391462226"
## [1] "lars  Test MSE (Org Scale): 89.2050390186953"
## [1] "lars  Test RMSE (Org Scale): 9.44484192661239"

Session Info

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2             knitr_1.20                 htmltools_0.3.6            reshape2_1.4.3            
##  [5] lars_1.2                   doParallel_1.0.14          iterators_1.0.10           caret_6.0-81              
##  [9] leaps_3.0                  ggforce_0.1.3              rlist_0.4.6.1              car_3.0-2                 
## [13] carData_3.0-2              bestNormalize_1.3.0        scales_1.0.0               onewaytests_2.0           
## [17] caTools_1.17.1.1           mosaic_1.5.0               mosaicData_0.17.0          ggformula_0.9.1           
## [21] ggstance_0.3.1             lattice_0.20-35            DT_0.5                     ggiraph_0.6.0             
## [25] investr_1.4.0              glmnet_2.0-16              foreach_1.4.4              Matrix_1.2-14             
## [29] MASS_7.3-50                PerformanceAnalytics_1.5.2 xts_0.11-2                 zoo_1.8-4                 
## [33] forcats_0.3.0              stringr_1.3.1              dplyr_0.7.8                purrr_0.2.5               
## [37] readr_1.3.1                tidyr_0.8.2                tibble_1.4.2               ggplot2_3.1.0             
## [41] tidyverse_1.2.1            usdm_1.1-18                raster_2.8-4               sp_1.3-1                  
## [45] pacman_0.5.0              
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.2.0       backports_1.1.3    plyr_1.8.4         lazyeval_0.2.1     splines_3.5.1      mycor_0.1.1       
##  [7] crosstalk_1.0.0    leaflet_2.0.2      digest_0.6.18      magrittr_1.5       mosaicCore_0.6.0   openxlsx_4.1.0    
## [13] recipes_0.1.4      modelr_0.1.2       gower_0.1.2        colorspace_1.3-2   rvest_0.3.2        ggrepel_0.8.0     
## [19] haven_2.0.0        crayon_1.3.4       jsonlite_1.5       bindr_0.1.1        survival_2.42-3    glue_1.3.0        
## [25] registry_0.5       gtable_0.2.0       ppcor_1.1          ipred_0.9-8        abind_1.4-5        rngtools_1.3.1    
## [31] bibtex_0.4.2       Rcpp_1.0.0         xtable_1.8-3       units_0.6-2        foreign_0.8-70     stats4_3.5.1      
## [37] lava_1.6.4         prodlim_2018.04.18 htmlwidgets_1.3    httr_1.4.0         RColorBrewer_1.1-2 pkgconfig_2.0.2   
## [43] farver_1.1.0       nnet_7.3-12        labeling_0.3       tidyselect_0.2.5   rlang_0.3.1        later_0.7.5       
## [49] munsell_0.5.0      cellranger_1.1.0   tools_3.5.1        cli_1.0.1          generics_0.0.2     moments_0.14      
## [55] sjlabelled_1.0.17  broom_0.5.1        evaluate_0.12      ggdendro_0.1-20    yaml_2.2.0         ModelMetrics_1.2.2
## [61] zip_2.0.1          nlme_3.1-137       doRNG_1.7.1        mime_0.6           xml2_1.2.0         compiler_3.5.1    
## [67] rstudioapi_0.8     curl_3.2           tweenr_1.0.1       stringi_1.2.4      gdtools_0.1.7      pillar_1.3.1      
## [73] data.table_1.11.8  bitops_1.0-6       insight_0.1.2      httpuv_1.4.5       R6_2.3.0           promises_1.0.1    
## [79] gridExtra_2.3      rio_0.5.16         codetools_0.2-15   assertthat_0.2.0   pkgmaker_0.27      withr_2.1.2       
## [85] nortest_1.0-4      mgcv_1.8-24        hms_0.4.2          quadprog_1.5-5     grid_3.5.1         rpart_4.1-13      
## [91] timeDate_3043.102  class_7.3-14       rmarkdown_1.11     shiny_1.2.0        lubridate_1.7.4